diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ec4f8f4be425e..7b26d00eb3d0b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15687,16 +15687,20 @@ static SDValue isScalarToVec(SDValue Op) { // On little endian, that's just the corresponding element in the other // half of the vector. On big endian, it is in the same half but right // justified rather than left justified in that half. -static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl &ShuffV, - int LHSMaxIdx, int RHSMinIdx, - int RHSMaxIdx, int HalfVec, - unsigned ValidLaneWidth, - const PPCSubtarget &Subtarget) { - for (int i = 0, e = ShuffV.size(); i < e; i++) { - int Idx = ShuffV[i]; - if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx)) - ShuffV[i] += - Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth; +static void fixupShuffleMaskForPermutedSToV( + SmallVectorImpl &ShuffV, int LHSFirstElt, int LHSLastElt, + int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, + unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) { + int LHSEltFixup = + Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts; + int RHSEltFixup = + Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts; + for (int I = 0, E = ShuffV.size(); I < E; ++I) { + int Idx = ShuffV[I]; + if (Idx >= LHSFirstElt && Idx <= LHSLastElt) + ShuffV[I] += LHSEltFixup; + else if (Idx >= RHSFirstElt && Idx <= RHSLastElt) + ShuffV[I] += RHSEltFixup; } } @@ -15735,6 +15739,51 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, OrigSToV.getOperand(0)); } +static bool isShuffleMaskInRange(const SmallVectorImpl &ShuffV, + int HalfVec, int LHSLastElementDefined, + int RHSLastElementDefined) { + for (int Index : ShuffV) { + if (Index < 0) // Skip explicitly undefined mask indices. + continue; + // Handle first input vector of the vector_shuffle. + if ((LHSLastElementDefined >= 0) && (Index < HalfVec) && + (Index > LHSLastElementDefined)) + return false; + // Handle second input vector of the vector_shuffle. + if ((RHSLastElementDefined >= 0) && + (Index > HalfVec + RHSLastElementDefined)) + return false; + } + return true; +} + +static SDValue generateSToVPermutedForVecShuffle( + int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, + int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, + SelectionDAG &DAG, const PPCSubtarget &Subtarget) { + EVT VecShuffOperandType = VecShuffOperand.getValueType(); + // Set up the values for the shuffle vector fixup. + NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits(); + // The last element depends on if the input comes from the LHS or RHS. + // + // For example: + // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...) + // + // For the LHS: The last element that comes from the LHS is actually 0, not 3 + // because elements 1 and higher of a scalar_to_vector are undefined. + // For the RHS: The last element that comes from the RHS is actually 5, not 7 + // because elements 1 and higher of a scalar_to_vector are undefined. + // It is also not 4 because the original scalar_to_vector is wider and + // actually contains two i32 elements. + LastElt = (uint64_t)ScalarSize > ShuffleEltWidth + ? ScalarSize / ShuffleEltWidth - 1 + FirstElt + : FirstElt; + SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget); + if (SToVPermuted.getValueType() != VecShuffOperandType) + SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted); + return SToVPermuted; +} + // On little endian subtargets, combine shuffles such as: // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, , %b // into: @@ -15782,36 +15831,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN, SDValue SToVLHS = isScalarToVec(LHS); SDValue SToVRHS = isScalarToVec(RHS); if (SToVLHS || SToVRHS) { - // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the - // same type and have differing element sizes, then do not perform - // the following transformation. The current transformation for - // SCALAR_TO_VECTOR assumes that both input vectors have the same - // element size. This will be updated in the future to account for - // differing sizes of the LHS and RHS. - if (SToVLHS && SToVRHS && - (SToVLHS.getValueType().getScalarSizeInBits() != - SToVRHS.getValueType().getScalarSizeInBits())) - return Res; - - int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements() - : SToVRHS.getValueType().getVectorNumElements(); - int NumEltsOut = ShuffV.size(); + EVT VT = SVN->getValueType(0); + uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits(); + int ShuffleNumElts = ShuffV.size(); + int HalfVec = ShuffleNumElts / 2; // The width of the "valid lane" (i.e. the lane that contains the value that // is vectorized) needs to be expressed in terms of the number of elements // of the shuffle. It is thereby the ratio of the values before and after - // any bitcast. - unsigned ValidLaneWidth = - SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() / - LHS.getValueType().getScalarSizeInBits() - : SToVRHS.getValueType().getScalarSizeInBits() / - RHS.getValueType().getScalarSizeInBits(); + // any bitcast, which will be set later on if the LHS or RHS are + // SCALAR_TO_VECTOR nodes. + unsigned LHSNumValidElts = HalfVec; + unsigned RHSNumValidElts = HalfVec; // Initially assume that neither input is permuted. These will be adjusted - // accordingly if either input is. - int LHSMaxIdx = -1; - int RHSMinIdx = -1; - int RHSMaxIdx = -1; - int HalfVec = LHS.getValueType().getVectorNumElements() / 2; + // accordingly if either input is. Note, that -1 means that all elements + // are undefined. + int LHSFirstElt = 0; + int RHSFirstElt = ShuffleNumElts; + int LHSLastElt = -1; + int RHSLastElt = -1; // Get the permuted scalar to vector nodes for the source(s) that come from // ISD::SCALAR_TO_VECTOR. @@ -15819,34 +15857,38 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN, // than 64 bits since for 64-bit elements, all instructions already put // the value into element zero. Since scalar size of LHS and RHS may differ // after isScalarToVec, this should be checked using their own sizes. + int LHSScalarSize = 0; + int RHSScalarSize = 0; if (SToVLHS) { - if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64) + LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits(); + if (!IsLittleEndian && LHSScalarSize >= 64) return Res; - // Set up the values for the shuffle vector fixup. - LHSMaxIdx = NumEltsOut / NumEltsIn; - SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget); - if (SToVLHS.getValueType() != LHS.getValueType()) - SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS); - LHS = SToVLHS; } if (SToVRHS) { - if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64) + RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits(); + if (!IsLittleEndian && RHSScalarSize >= 64) return Res; - RHSMinIdx = NumEltsOut; - RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx; - SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget); - if (SToVRHS.getValueType() != RHS.getValueType()) - SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS); - RHS = SToVRHS; } + if (LHSScalarSize != 0) + LHS = generateSToVPermutedForVecShuffle( + LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt, + LHSLastElt, LHS, SToVLHS, DAG, Subtarget); + if (RHSScalarSize != 0) + RHS = generateSToVPermutedForVecShuffle( + RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt, + RHSLastElt, RHS, SToVRHS, DAG, Subtarget); + + if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt)) + return Res; // Fix up the shuffle mask to reflect where the desired element actually is. // The minimum and maximum indices that correspond to element zero for both // the LHS and RHS are computed and will control which shuffle mask entries // are to be changed. For example, if the RHS is permuted, any shuffle mask - // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted. - fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx, - HalfVec, ValidLaneWidth, Subtarget); + // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted. + fixupShuffleMaskForPermutedSToV( + ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec, + LHSNumValidElts, RHSNumValidElts, Subtarget); Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); // We may have simplified away the shuffle. We won't be able to do anything diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll index 8f12b182283f5..656bc3661178a 100644 --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -2499,11 +2499,9 @@ define <2 x i64> @buildi2(i64 %arg, i32 %arg1) { ; ; CHECK-LE-LABEL: buildi2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mtfprd f0, r4 +; CHECK-LE-NEXT: mtfprwz f0, r4 ; CHECK-LE-NEXT: mtfprd f1, r3 -; CHECK-LE-NEXT: xxswapd vs0, vs0 -; CHECK-LE-NEXT: xxswapd v2, vs1 -; CHECK-LE-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-NEXT: blr ; ; CHECK-AIX-LABEL: buildi2: diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll index 31d0960e19f4e..3ab49cd39f8d8 100644 --- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll @@ -266,56 +266,54 @@ entry: define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) { ; CHECK-LE-P8-LABEL: test_v16i8_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: mtvsrd v2, r4 +; CHECK-LE-P8-NEXT: mtvsrd v3, r3 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v16i8_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r4 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxswapd v3, vs0 -; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: mtvsrd v2, r4 +; CHECK-LE-P9-NEXT: mtvsrd v3, r3 +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v16i8_v8i16: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r4, r4, 56 -; CHECK-BE-P8-NEXT: sldi r3, r3, 48 -; CHECK-BE-P8-NEXT: mtvsrd v2, r4 -; CHECK-BE-P8-NEXT: mtvsrd v3, r3 -; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v16i8_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r4, r4, 56 -; CHECK-BE-P9-NEXT: sldi r3, r3, 48 -; CHECK-BE-P9-NEXT: mtvsrd v2, r4 -; CHECK-BE-P9-NEXT: mtvsrd v3, r3 -; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: mtvsrwz v2, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-P9-NEXT: mtfprwz f0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 -; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 -; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 -; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16: @@ -348,56 +346,54 @@ entry: define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) { ; CHECK-LE-P8-LABEL: test_v8i16_v16i8: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: mtvsrd v2, r4 +; CHECK-LE-P8-NEXT: mtvsrd v3, r3 +; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r4 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxswapd v3, vs0 -; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: mtvsrd v2, r4 +; CHECK-LE-P9-NEXT: mtvsrd v3, r3 +; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v16i8: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r4, r4, 56 -; CHECK-BE-P8-NEXT: sldi r3, r3, 48 -; CHECK-BE-P8-NEXT: mtvsrd v2, r4 -; CHECK-BE-P8-NEXT: mtvsrd v3, r3 -; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_v16i8: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r4, r4, 56 -; CHECK-BE-P9-NEXT: sldi r3, r3, 48 -; CHECK-BE-P9-NEXT: mtvsrd v2, r4 -; CHECK-BE-P9-NEXT: mtvsrd v3, r3 -; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 -; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 -; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 -; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8: @@ -472,7 +468,7 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) { ; CHECK-AIX-64-P8-LABEL: test_none_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C5(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 @@ -481,7 +477,7 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) { ; CHECK-AIX-64-P9-LABEL: test_none_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4) ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 @@ -545,7 +541,7 @@ define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) { ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C6(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 @@ -580,53 +576,54 @@ entry: define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %b) { ; CHECK-LE-P8-LABEL: test_v16i8_v4i32: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: mtvsrwz v3, r4 +; CHECK-LE-P8-NEXT: vmrghb v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v16i8_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: mtvsrws v3, r4 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: mtvsrd v2, r3 +; CHECK-LE-P9-NEXT: mtvsrwz v3, r4 +; CHECK-LE-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v16i8_v4i32: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r3, r3, 56 -; CHECK-BE-P8-NEXT: mtvsrd v2, r3 -; CHECK-BE-P8-NEXT: sldi r3, r4, 32 -; CHECK-BE-P8-NEXT: mtvsrd v3, r3 -; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v3, r4 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v16i8_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r3, r3, 56 -; CHECK-BE-P9-NEXT: mtvsrws v3, r4 -; CHECK-BE-P9-NEXT: mtvsrd v2, r3 -; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 -; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32 -; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4 -; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 -; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32: @@ -660,53 +657,54 @@ entry: define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) { ; CHECK-LE-P8-LABEL: test_v4i32_v16i8: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: mtvsrd v2, r4 +; CHECK-LE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r4 -; CHECK-LE-P9-NEXT: mtvsrws v3, r3 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: mtvsrd v2, r4 +; CHECK-LE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v16i8: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r4, r4, 56 -; CHECK-BE-P8-NEXT: sldi r3, r3, 32 -; CHECK-BE-P8-NEXT: mtvsrd v2, r4 -; CHECK-BE-P8-NEXT: mtvsrd v3, r3 -; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI8_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI8_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v4i32_v16i8: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r4, r4, 56 -; CHECK-BE-P9-NEXT: mtvsrws v3, r3 -; CHECK-BE-P9-NEXT: mtvsrd v2, r4 -; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI8_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI8_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 -; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 -; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C8(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 -; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r3 -; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 -; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8: @@ -781,9 +779,9 @@ define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) { ; ; CHECK-AIX-64-P8-LABEL: test_none_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r4, L..C5(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C9(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C10(r2) # %const.1 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3 @@ -875,7 +873,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lbzx r4, 0, r4 ; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C11(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7 @@ -885,7 +883,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu ; CHECK-AIX-64-P9-LABEL: test_v4i32_none: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C7(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r4 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: vspltb v2, v2, 7 @@ -928,20 +926,16 @@ entry: define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %b) { ; CHECK-LE-P8-LABEL: test_v16i8_v2i64: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: mtvsrd v3, r4 +; CHECK-LE-P8-NEXT: vmrghb v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v16i8_v2i64: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: mtfprd f0, r4 -; CHECK-LE-P9-NEXT: xxswapd v3, vs0 -; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: mtvsrd v2, r3 +; CHECK-LE-P9-NEXT: mtvsrd v3, r4 +; CHECK-LE-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v16i8_v2i64: @@ -1007,20 +1001,16 @@ entry: define <16 x i8> @test_v2i64_v16i8(i64 %arg, i8 %arg1) { ; CHECK-LE-P8-LABEL: test_v2i64_v16i8: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: mtvsrd v2, r4 +; CHECK-LE-P8-NEXT: mtvsrd v3, r3 +; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v2i64_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r4 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxswapd v3, vs0 -; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: mtvsrd v2, r4 +; CHECK-LE-P9-NEXT: mtvsrd v3, r3 +; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v2i64_v16i8: @@ -1392,7 +1382,7 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) { ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r5, L..C8(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r5, L..C12(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 ; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r5 @@ -1401,7 +1391,7 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) { ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: ld r5, L..C4(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r5, L..C8(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4 ; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r3 ; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r5) @@ -1439,53 +1429,54 @@ entry: define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %arg1) { ; CHECK-LE-P8-LABEL: test_v8i16_v4i32: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r7 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r8 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: mtvsrd v2, r7 +; CHECK-LE-P8-NEXT: mtvsrwz v3, r8 +; CHECK-LE-P8-NEXT: vmrghb v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r7 -; CHECK-LE-P9-NEXT: mtvsrws v3, r8 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: mtvsrd v2, r7 +; CHECK-LE-P9-NEXT: mtvsrwz v3, r8 +; CHECK-LE-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v4i32: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r3, r7, 48 -; CHECK-BE-P8-NEXT: mtvsrd v2, r3 -; CHECK-BE-P8-NEXT: sldi r3, r8, 32 -; CHECK-BE-P8-NEXT: mtvsrd v3, r3 -; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI17_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v2, r7 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r8 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI17_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r3, r7, 48 -; CHECK-BE-P9-NEXT: mtvsrws v3, r8 -; CHECK-BE-P9-NEXT: mtvsrd v2, r3 -; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI17_0@toc@ha +; CHECK-BE-P9-NEXT: mtfprwz f0, r7 +; CHECK-BE-P9-NEXT: mtvsrwz v2, r8 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI17_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 -; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32 -; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C13(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4 -; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 -; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C9(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: @@ -1519,20 +1510,16 @@ entry: define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %arg1) { ; CHECK-LE-P8-LABEL: test_v8i16_v2i64: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r7 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r8 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: mtvsrd v2, r7 +; CHECK-LE-P8-NEXT: mtvsrd v3, r8 +; CHECK-LE-P8-NEXT: vmrghb v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v2i64: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r7 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: mtfprd f0, r8 -; CHECK-LE-P9-NEXT: xxswapd v3, vs0 -; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: mtvsrd v2, r7 +; CHECK-LE-P9-NEXT: mtvsrd v3, r8 +; CHECK-LE-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v2i64: @@ -1669,53 +1656,54 @@ entry: define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) { ; CHECK-LE-P8-LABEL: test_v4i32_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-LE-P8-NEXT: mtvsrd v3, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r4 -; CHECK-LE-P9-NEXT: mtvsrws v2, r3 -; CHECK-LE-P9-NEXT: xxswapd v3, vs0 -; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: mtvsrwz v2, r3 +; CHECK-LE-P9-NEXT: mtvsrd v3, r4 +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v8i16: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r3, r3, 32 -; CHECK-BE-P8-NEXT: mtvsrd v2, r3 -; CHECK-BE-P8-NEXT: sldi r3, r4, 48 -; CHECK-BE-P8-NEXT: mtvsrd v3, r3 -; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI20_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v3, r4 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI20_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v4i32_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: mtvsrws v2, r3 -; CHECK-BE-P9-NEXT: sldi r3, r4, 48 -; CHECK-BE-P9-NEXT: mtvsrd v3, r3 -; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI20_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI20_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 -; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 -; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 -; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C14(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: mtvsrws v2, r3 -; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 -; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C10(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: @@ -1824,18 +1812,15 @@ define <16 x i8> @test_v2i64_v4i32(i64 %arg, i32 %arg1, <2 x i64> %a, <4 x i32> ; CHECK-LE-P8-LABEL: test_v2i64_v4i32: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P8-NEXT: xxmrglw v2, vs1, vs0 +; CHECK-LE-P8-NEXT: mtfprwz f1, r4 +; CHECK-LE-P8-NEXT: xxmrghw v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v2i64_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxmrglw v2, vs1, vs0 +; CHECK-LE-P9-NEXT: mtfprwz f1, r4 +; CHECK-LE-P9-NEXT: xxmrghw v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v2i64_v4i32: @@ -1899,20 +1884,16 @@ entry: define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) { ; CHECK-LE-P8-LABEL: test_v2i64_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 -; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: mtvsrd v3, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v2i64_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: mtfprd f0, r4 -; CHECK-LE-P9-NEXT: xxswapd v3, vs0 -; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: mtvsrd v2, r3 +; CHECK-LE-P9-NEXT: mtvsrd v3, r4 +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v2i64_v8i16: @@ -1978,27 +1959,23 @@ entry: define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { ; CHECK-LE-P8-LABEL: test_v4i32_v2i64: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI24_0@toc@ha -; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI24_0@toc@l -; CHECK-LE-P8-NEXT: xxswapd v2, f0 -; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, f0 -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P8-NEXT: xxswapd v4, vs0 -; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI24_0@toc@ha +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v4, 0, r4 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI24_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v2i64: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 ; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI24_0@toc@ha -; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI24_0@toc@l -; CHECK-LE-P9-NEXT: xxswapd v2, f0 ; CHECK-LE-P9-NEXT: lfd f0, 0(r4) -; CHECK-LE-P9-NEXT: xxswapd v3, f0 -; CHECK-LE-P9-NEXT: lxv vs0, 0(r3) -; CHECK-LE-P9-NEXT: xxperm v2, v3, vs0 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI24_0@toc@l +; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-LE-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v2i64: @@ -2026,7 +2003,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt ; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C9(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C15(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 @@ -2036,7 +2013,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt ; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C11(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r4) ; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll index 56c8c128ba9f4..fcfcda586694d 100644 --- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll @@ -30,42 +30,42 @@ define <2 x i64> @test_v16i8_v16i8(i8 %arg1, i8 %arg) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 ; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v16i8_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 ; CHECK-LE-P9-NEXT: mtfprd f1, r4 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v16i8_v16i8: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 ; CHECK-BE-P8-NEXT: mtfprwz f1, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v16i8_v16i8: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 ; CHECK-BE-P9-NEXT: mtfprwz f1, r4 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8: @@ -102,42 +102,42 @@ define <2 x i64> @test_none_v16i8(i8 %arg1, ptr nocapture noundef readonly %b) { ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: xxswapd v2, vs0 ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_none_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxv v2, 0(r4) ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_none_v16i8: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 ; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_none_v16i8: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxv v2, 0(r4) ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_none_v16i8: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_none_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_none_v16i8: @@ -170,42 +170,42 @@ define <2 x i64> @test_v16i8_none(i8 %arg1, ptr nocapture noundef readonly %b) { ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: xxswapd v2, vs0 ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: xxpermdi v2, v2, vs0, 2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v16i8_none: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxv v2, 0(r4) ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: xxpermdi v2, v2, vs0, 2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v16i8_none: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 ; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P8-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v16i8_none: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxv v2, 0(r4) ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P9-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v16i8_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P8-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v16i8_none: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P9-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v16i8_none: @@ -237,54 +237,42 @@ define <2 x i64> @test_v16i8_v8i16(i8 %arg1, i16 %arg) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 ; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v16i8_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 ; CHECK-LE-P9-NEXT: mtfprd f1, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v16i8_v8i16: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r3, r3, 56 -; CHECK-BE-P8-NEXT: mtfprd f0, r3 -; CHECK-BE-P8-NEXT: sldi r3, r4, 48 -; CHECK-BE-P8-NEXT: mtfprd f1, r3 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v16i8_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r3, r3, 56 -; CHECK-BE-P9-NEXT: mtfprd f0, r3 -; CHECK-BE-P9-NEXT: sldi r3, r4, 48 -; CHECK-BE-P9-NEXT: mtfprd f1, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 -; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 -; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16: @@ -320,54 +308,42 @@ define <2 x i64> @test_v8i16_v16i8(i8 %arg1, i16 %arg) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 ; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 ; CHECK-LE-P9-NEXT: mtfprd f1, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v16i8: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r3, r3, 56 -; CHECK-BE-P8-NEXT: mtfprd f0, r3 -; CHECK-BE-P8-NEXT: sldi r3, r4, 48 -; CHECK-BE-P8-NEXT: mtfprd f1, r3 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_v16i8: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r3, r3, 56 -; CHECK-BE-P9-NEXT: mtfprd f0, r3 -; CHECK-BE-P9-NEXT: sldi r3, r4, 48 -; CHECK-BE-P9-NEXT: mtfprd f1, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 -; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 -; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8: @@ -404,42 +380,42 @@ define <2 x i64> @test_v8i16_none(i16 %arg1, ptr nocapture noundef readonly %b) ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: xxswapd v2, vs0 ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: xxpermdi v2, v2, vs0, 2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_none: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxv v2, 0(r4) ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: xxpermdi v2, v2, vs0, 2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_none: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 ; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P8-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_none: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxv v2, 0(r4) ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P9-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P8-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_none: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P9-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_none: @@ -472,42 +448,42 @@ define <2 x i64> @test_none_v8i16(i16 %arg1, ptr nocapture noundef readonly %b) ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: xxswapd v2, vs0 ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_none_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxv v2, 0(r4) ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_none_v8i16: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 ; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_none_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxv v2, 0(r4) ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_none_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_none_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_none_v8i16: @@ -538,52 +514,43 @@ define <2 x i64> @test_v16i8_v4i32(i8 %arg1, i32 %arg) { ; CHECK-LE-P8-LABEL: test_v16i8_v4i32: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: mtfprwz f1, r4 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v16i8_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: mtfprwz f1, r4 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v16i8_v4i32: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r3, r3, 56 -; CHECK-BE-P8-NEXT: mtfprd f0, r3 -; CHECK-BE-P8-NEXT: sldi r3, r4, 32 -; CHECK-BE-P8-NEXT: mtfprd f1, r3 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v16i8_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r3, r3, 56 -; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 -; CHECK-BE-P9-NEXT: mtfprd f0, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32 -; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 -; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32: @@ -618,52 +585,43 @@ define <2 x i64> @test_v4i32_v16i8(i8 %arg1, i32 %arg) { ; CHECK-LE-P8-LABEL: test_v4i32_v16i8: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P8-NEXT: mtfprwz f1, r4 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P9-NEXT: mtfprwz f1, r4 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v16i8: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r3, r3, 56 -; CHECK-BE-P8-NEXT: mtfprd f0, r3 -; CHECK-BE-P8-NEXT: sldi r3, r4, 32 -; CHECK-BE-P8-NEXT: mtfprd f1, r3 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v4i32_v16i8: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r3, r3, 56 -; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 -; CHECK-BE-P9-NEXT: mtfprd f0, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32 -; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 -; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 -; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8: @@ -700,42 +658,42 @@ define <2 x i64> @test_none_v4i32(i32 %arg1, ptr nocapture noundef readonly %b) ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: xxswapd v2, vs0 ; CHECK-LE-P8-NEXT: mtfprwz f0, r3 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_none_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxv v2, 0(r4) ; CHECK-LE-P9-NEXT: mtfprwz f0, r3 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_none_v4i32: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 ; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_none_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxv v2, 0(r4) ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_none_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_none_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: xxpermdi v2, v2, vs0, 1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_none_v4i32: @@ -768,42 +726,42 @@ define <2 x i64> @test_v4i32_none(i32 %arg1, ptr nocapture noundef readonly %b) ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: xxswapd v2, vs0 ; CHECK-LE-P8-NEXT: mtfprwz f0, r3 -; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: xxpermdi v2, v2, vs0, 2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_none: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxv v2, 0(r4) ; CHECK-LE-P9-NEXT: mtfprwz f0, r3 -; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: xxpermdi v2, v2, vs0, 2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_none: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 ; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P8-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v4i32_none: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxv v2, 0(r4) ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P9-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v4i32_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P8-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v4i32_none: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P9-NEXT: xxpermdi v2, vs0, v2, 2 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_none: @@ -835,18 +793,14 @@ define <2 x i64> @test_v16i8_v2i64(i8 %arg1, i64 %arg) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 ; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd v2, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v16i8_v2i64: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 ; CHECK-LE-P9-NEXT: mtfprd f1, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxswapd v2, vs1 -; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v16i8_v2i64: @@ -923,18 +877,14 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 ; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd v2, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v2i64_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 ; CHECK-LE-P9-NEXT: mtfprd f1, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxswapd v2, vs1 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v2i64_v16i8: @@ -942,14 +892,14 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) { ; CHECK-BE-P8-NEXT: mtfprd f0, r4 ; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0 ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 -; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: xxmrgld v2, v2, vs0 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v2i64_v16i8: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 ; CHECK-BE-P9-NEXT: mtvsrdd v2, r4, r4 -; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: xxmrgld v2, v2, vs0 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8: @@ -957,14 +907,14 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) { ; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4 ; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0 ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, v2, vs0 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r4, r4 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, v2, vs0 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8: @@ -1159,42 +1109,42 @@ define <2 x i64> @test_v8i16_v8i16(i16 %arg1, i16 %arg) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 ; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 ; CHECK-LE-P9-NEXT: mtfprd f1, r4 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v8i16: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 ; CHECK-BE-P8-NEXT: mtfprwz f1, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 ; CHECK-BE-P9-NEXT: mtfprwz f1, r4 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16: @@ -1229,52 +1179,43 @@ define <2 x i64> @test_v8i16_v4i32(i16 %arg1, i32 %arg) { ; CHECK-LE-P8-LABEL: test_v8i16_v4i32: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: mtfprwz f1, r4 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: mtfprwz f1, r4 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v4i32: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r3, r3, 48 -; CHECK-BE-P8-NEXT: mtfprd f0, r3 -; CHECK-BE-P8-NEXT: sldi r3, r4, 32 -; CHECK-BE-P8-NEXT: mtfprd f1, r3 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: sldi r3, r3, 48 -; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 -; CHECK-BE-P9-NEXT: mtfprd f0, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32 -; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 -; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: @@ -1310,18 +1251,14 @@ define <2 x i64> @test_v8i16_v2i64(i16 %arg1, i64 %arg) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 ; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd v2, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v2i64: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 ; CHECK-LE-P9-NEXT: mtfprd f1, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxswapd v2, vs1 -; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v2i64: @@ -1398,42 +1335,42 @@ define <2 x i64> @test_v4i32_v4i32(i32 %arg1, i32 %arg) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprwz f0, r3 ; CHECK-LE-P8-NEXT: mtfprwz f1, r4 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprwz f0, r3 ; CHECK-LE-P9-NEXT: mtfprwz f1, r4 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v4i32: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: mtfprwz f0, r3 ; CHECK-BE-P8-NEXT: mtfprwz f1, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v4i32_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: mtfprwz f0, r3 ; CHECK-BE-P9-NEXT: mtfprwz f1, r4 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 ; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: @@ -1467,53 +1404,44 @@ entry: define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) { ; CHECK-LE-P8-LABEL: test_v4i32_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 ; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 ; CHECK-LE-P9-NEXT: mtfprd f1, r4 -; CHECK-LE-P9-NEXT: mtvsrws vs0, r3 -; CHECK-LE-P9-NEXT: xxswapd vs1, vs1 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v8i16: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: sldi r3, r3, 32 -; CHECK-BE-P8-NEXT: mtfprd f0, r3 -; CHECK-BE-P8-NEXT: sldi r3, r4, 48 -; CHECK-BE-P8-NEXT: mtfprd f1, r3 -; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v4i32_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: mtvsrws vs0, r3 -; CHECK-BE-P9-NEXT: sldi r3, r4, 48 -; CHECK-BE-P9-NEXT: mtfprd f1, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 -; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 -; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 -; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: mtvsrws vs0, r3 -; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 -; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: @@ -1547,19 +1475,16 @@ entry: define <2 x i64> @test_v4i32_v2i64(i32 %arg1, i64 %arg) { ; CHECK-LE-P8-LABEL: test_v4i32_v2i64: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 ; CHECK-LE-P8-NEXT: mtfprd f1, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxswapd v2, vs1 -; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v2i64: ; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 ; CHECK-LE-P9-NEXT: mtfprd f1, r4 -; CHECK-LE-P9-NEXT: mtvsrws vs0, r3 -; CHECK-LE-P9-NEXT: xxswapd v2, vs1 -; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v2i64: @@ -1730,18 +1655,15 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) { ; CHECK-LE-P8-LABEL: test_v2i64_v4i32: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: mtfprwz f1, r4 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v2i64_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: mtvsrws vs0, r4 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: mtfprwz f1, r4 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v2i64_v4i32: @@ -1749,14 +1671,14 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) { ; CHECK-BE-P8-NEXT: mtfprd f0, r3 ; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0 ; CHECK-BE-P8-NEXT: mtfprwz f0, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: xxmrgld v2, v2, vs0 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v2i64_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: mtfprwz f0, r4 ; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: xxmrgld v2, v2, vs0 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32: @@ -1764,14 +1686,14 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) { ; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 ; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0 ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, v2, vs0 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 ; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, v2, vs0 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: @@ -1813,19 +1735,15 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) { ; CHECK-LE-P8-LABEL: test_v2i64_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v2i64_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: mtfprd f0, r3 -; CHECK-LE-P9-NEXT: xxswapd v2, vs0 -; CHECK-LE-P9-NEXT: mtfprd f0, r4 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v2i64_v8i16: @@ -1833,14 +1751,14 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) { ; CHECK-BE-P8-NEXT: mtfprd f0, r3 ; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0 ; CHECK-BE-P8-NEXT: mtfprwz f0, r4 -; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: xxmrgld v2, v2, vs0 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v2i64_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: mtfprwz f0, r4 ; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3 -; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: xxmrgld v2, v2, vs0 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16: @@ -1848,14 +1766,14 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) { ; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 ; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0 ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: xxmrgld v2, v2, vs0 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 ; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: xxmrgld v2, v2, vs0 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll index c8e7b20e4b8c3..402a4f34e62b2 100644 --- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll @@ -28,15 +28,11 @@ define void @test_none_v8i16(ptr %a) { ; CHECK-LE-P8-LABEL: test_none_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha -; CHECK-LE-P8-NEXT: lxsdx v4, 0, r3 -; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: lhz r4, 0(r3) -; CHECK-LE-P8-NEXT: mtvsrd v3, r4 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: vperm v2, v3, v4, v2 -; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r3 +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 ; CHECK-LE-P8-NEXT: stfdx f0, 0, r3 ; CHECK-LE-P8-NEXT: blr ; @@ -44,11 +40,8 @@ define void @test_none_v8i16(ptr %a) { ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 ; CHECK-LE-P9-NEXT: lfd f1, 0(r3) -; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-LE-P9-NEXT: lxv vs2, 0(r3) -; CHECK-LE-P9-NEXT: xxperm vs1, vs0, vs2 -; CHECK-LE-P9-NEXT: xxswapd vs0, vs1 +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 ; CHECK-LE-P9-NEXT: stfd f0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -131,11 +124,15 @@ define void @test_v8i16_none(ptr %a) { ; CHECK-LE-P8-LABEL: test_v8i16_none: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-LE-P8-NEXT: lhz r3, 0(r3) -; CHECK-LE-P8-NEXT: mtfprd f1, r3 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: mtvsrd v4, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 ; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-P8-NEXT: blr ; @@ -143,59 +140,76 @@ define void @test_v8i16_none(ptr %a) { ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 ; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) -; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-LE-P9-NEXT: lxv vs2, 0(r3) +; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_none: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: lhz r4, 0(r3) -; CHECK-BE-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-BE-P8-NEXT: mtfprwz f0, r4 -; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_none: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3 ; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) -; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs2, 0(r3) +; CHECK-BE-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-P9-NEXT: stxv vs1, 0(r3) ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-64-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 -; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C0(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_none: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) -; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv vs2, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-AIX-64-P9-NEXT: stxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_none: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: mtfprwz f0, r4 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_none: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) -; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-AIX-32-P9-NEXT: stxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr entry: %0 = load <2 x i8>, ptr undef, align 1 @@ -264,7 +278,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { ; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-AIX-64-P8-NEXT: mffprwz r4, f0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4 -; CHECK-AIX-64-P8-NEXT: ld r4, L..C0(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4 ; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 @@ -275,7 +289,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { ; CHECK-AIX-64-P9-NEXT: li r4, 0 ; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2 ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 -; CHECK-AIX-64-P9-NEXT: ld r4, L..C0(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4) ; CHECK-AIX-64-P9-NEXT: xxperm vs0, v2, vs1 ; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) @@ -286,7 +300,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { ; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 ; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 -; CHECK-AIX-32-P8-NEXT: lwz r4, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 ; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 @@ -297,7 +311,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { ; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16 ; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1) ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-32-P9-NEXT: lwz r4, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r4) ; CHECK-AIX-32-P9-NEXT: xxperm vs0, v2, vs1 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) @@ -369,7 +383,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) { ; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-AIX-64-P8-NEXT: mffprwz r4, f0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4 -; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C2(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4 ; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 @@ -380,7 +394,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) { ; CHECK-AIX-64-P9-NEXT: li r4, 0 ; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2 ; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 -; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r4, L..C2(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4) ; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) @@ -391,7 +405,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) { ; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 ; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 -; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C2(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 ; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 @@ -402,7 +416,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) { ; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16 ; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1) ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r4, L..C2(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r4) ; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) @@ -474,12 +488,12 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad ; ; CHECK-AIX-64-P8-LABEL: test_none_v2i64: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r5, L..C3(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 ; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C3(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.1 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 @@ -487,7 +501,7 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad ; CHECK-AIX-64-P9-LABEL: test_none_v2i64: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) -; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 ; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs1, 0 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) @@ -497,12 +511,12 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad ; CHECK-AIX-32-P8-LABEL: test_none_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 ; CHECK-AIX-32-P8-NEXT: lxvw4x v5, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.1 ; CHECK-AIX-32-P8-NEXT: vperm v2, v5, v2, v4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 @@ -511,7 +525,7 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad ; CHECK-AIX-32-P9-LABEL: test_none_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: mtfprwz f1, r4 ; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs1, 0 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) @@ -595,7 +609,7 @@ define void @test_v8i16_v8i16(ptr %a) { ; CHECK-LE-P8-NEXT: lhz r3, 0(r3) ; CHECK-LE-P8-NEXT: mtfprd f0, r4 ; CHECK-LE-P8-NEXT: mtfprd f1, r3 -; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 ; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-P8-NEXT: blr @@ -604,7 +618,7 @@ define void @test_v8i16_v8i16(ptr %a) { ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 ; CHECK-LE-P9-NEXT: lxsihzx f1, 0, r3 -; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -614,7 +628,7 @@ define void @test_v8i16_v8i16(ptr %a) { ; CHECK-BE-P8-NEXT: lhz r3, 0(r3) ; CHECK-BE-P8-NEXT: mtfprwz f0, r4 ; CHECK-BE-P8-NEXT: mtfprwz f1, r3 -; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: xxmrglw vs0, vs0, vs1 ; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 ; CHECK-BE-P8-NEXT: blr ; @@ -622,7 +636,7 @@ define void @test_v8i16_v8i16(ptr %a) { ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3 ; CHECK-BE-P9-NEXT: lxsihzx f1, 0, r3 -; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P9-NEXT: xxmrglw vs0, vs0, vs1 ; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-BE-P9-NEXT: blr ; @@ -632,7 +646,7 @@ define void @test_v8i16_v8i16(ptr %a) { ; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) ; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 ; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r3 -; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: xxmrglw vs0, vs0, vs1 ; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 ; CHECK-AIX-64-P8-NEXT: blr ; @@ -640,7 +654,7 @@ define void @test_v8i16_v8i16(ptr %a) { ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 ; CHECK-AIX-64-P9-NEXT: lxsihzx f1, 0, r3 -; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: xxmrglw vs0, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-64-P9-NEXT: blr ; @@ -650,7 +664,7 @@ define void @test_v8i16_v8i16(ptr %a) { ; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) ; CHECK-AIX-32-P8-NEXT: mtfprwz f0, r4 ; CHECK-AIX-32-P8-NEXT: mtfprwz f1, r3 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: xxmrglw vs0, vs0, vs1 ; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; @@ -658,7 +672,7 @@ define void @test_v8i16_v8i16(ptr %a) { ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 ; CHECK-AIX-32-P9-NEXT: lxsihzx f1, 0, r3 -; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: xxmrglw vs0, vs0, vs1 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr entry: @@ -681,85 +695,82 @@ define void @test_v8i16_v4i32(ptr %a) { ; CHECK-LE-P8-NEXT: lhz r4, 0(r3) ; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3 ; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd vs1, f1 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 ; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P9-NEXT: xxswapd vs0, f0 -; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2 +; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v4i32: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-P8-NEXT: lhz r3, 0(r3) -; CHECK-BE-P8-NEXT: sldi r3, r3, 48 -; CHECK-BE-P8-NEXT: mtfprd f1, r3 -; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0 -; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-BE-P9-NEXT: xxmrghw vs0, v2, vs0 -; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs2, 0(r3) +; CHECK-BE-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-P9-NEXT: stxv vs1, 0(r3) ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0 -; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 -; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, v2, vs0 -; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv vs2, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-AIX-64-P9-NEXT: stxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 -; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C5(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 -; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-AIX-32-P9-NEXT: stxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr entry: %0 = load <2 x i8>, ptr undef, align 1 @@ -780,20 +791,16 @@ define void @test_v8i16_v2i64(ptr %a) { ; CHECK-LE-P8-NEXT: lhz r4, 0(r3) ; CHECK-LE-P8-NEXT: lfdx f1, 0, r3 ; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd vs1, f1 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 ; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v2i64: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-LE-P9-NEXT: lfd f0, 0(r3) -; CHECK-LE-P9-NEXT: xxswapd vs0, f0 -; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2 +; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-LE-P9-NEXT: lfd f1, 0(r3) +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -838,23 +845,22 @@ define void @test_v8i16_v2i64(ptr %a) { ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 -; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 -; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-AIX-32-P9-NEXT: stxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr entry: %0 = load <2 x i8>, ptr undef, align 1 @@ -914,7 +920,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) { ; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 @@ -923,7 +929,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) { ; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r4 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 @@ -932,7 +938,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) { ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C7(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 @@ -941,7 +947,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) { ; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C6(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 @@ -960,84 +966,81 @@ define void @test_v4i32_v8i16(ptr %a) { ; CHECK-LE-P8-NEXT: lhz r4, 0(r3) ; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3 ; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd vs1, f1 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 ; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P9-NEXT: xxswapd vs0, f0 -; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0 +; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v8i16: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-P8-NEXT: lhz r3, 0(r3) -; CHECK-BE-P8-NEXT: sldi r3, r3, 48 -; CHECK-BE-P8-NEXT: mtfprd f1, r3 -; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v4i32_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs2, 0(r3) +; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2 ; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 -; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 -; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv vs2, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm vs0, vs1, vs2 ; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C8(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 -; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C7(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm vs0, vs1, vs2 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr entry: @@ -1058,9 +1061,7 @@ define void @test_v4i32_v2i64(ptr %a) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 ; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3 -; CHECK-LE-P8-NEXT: xxswapd vs0, f0 -; CHECK-LE-P8-NEXT: xxswapd vs1, f1 -; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 ; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-P8-NEXT: blr @@ -1069,9 +1070,7 @@ define void @test_v4i32_v2i64(ptr %a) { ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lfd f0, 0(r3) ; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3 -; CHECK-LE-P9-NEXT: xxswapd vs0, f0 -; CHECK-LE-P9-NEXT: xxswapd vs1, f1 -; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -1227,9 +1226,7 @@ define void @test_v2i64_v4i32(ptr %a) { ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 ; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3 -; CHECK-LE-P8-NEXT: xxswapd vs0, f0 -; CHECK-LE-P8-NEXT: xxswapd vs1, f1 -; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 ; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-P8-NEXT: blr @@ -1238,9 +1235,7 @@ define void @test_v2i64_v4i32(ptr %a) { ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lfd f0, 0(r3) ; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3 -; CHECK-LE-P9-NEXT: xxswapd vs0, f0 -; CHECK-LE-P9-NEXT: xxswapd vs1, f1 -; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -1315,20 +1310,16 @@ define void @test_v2i64_v8i16(ptr %a) { ; CHECK-LE-P8-NEXT: lhz r4, 0(r3) ; CHECK-LE-P8-NEXT: lfdx f1, 0, r3 ; CHECK-LE-P8-NEXT: mtfprd f0, r4 -; CHECK-LE-P8-NEXT: xxswapd vs1, f1 -; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 -; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 ; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v2i64_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-LE-P9-NEXT: lfd f0, 0(r3) -; CHECK-LE-P9-NEXT: xxswapd vs0, f0 -; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0 +; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-LE-P9-NEXT: lfd f1, 0(r3) +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -1373,22 +1364,21 @@ define void @test_v2i64_v8i16(ptr %a) { ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 -; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C8(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs2, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm vs0, vs1, vs2 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll index e1aa531db449e..47ffdb4625ed3 100644 --- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll @@ -241,16 +241,13 @@ entry: define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 { ; CHECK-LE-P8-LABEL: test_none_v4i32: ; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r5 ; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; CHECK-LE-P8-NEXT: mtvsrd v3, r5 ; CHECK-LE-P8-NEXT: lxsiwzx v4, 0, r3 ; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 -; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha -; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_1@toc@l ; CHECK-LE-P8-NEXT: xxswapd v2, vs0 ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 -; CHECK-LE-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v2 ; CHECK-LE-P8-NEXT: xxswapd v3, vs0 ; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 ; CHECK-LE-P8-NEXT: xxswapd vs0, v2 @@ -261,13 +258,11 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 ; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-LE-P9-NEXT: mtvsrd v3, r5 +; CHECK-LE-P9-NEXT: mtfprd f0, r5 ; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-LE-P9-NEXT: lxv vs0, 0(r3) -; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_1@toc@ha -; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_1@toc@l +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 ; CHECK-LE-P9-NEXT: lxv v4, 0(r3) -; CHECK-LE-P9-NEXT: xxperm v3, v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v3, v3, v3 ; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 ; CHECK-LE-P9-NEXT: xxswapd vs0, v2 ; CHECK-LE-P9-NEXT: stfd f0, 0(r3) @@ -275,15 +270,13 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 ; ; CHECK-BE-P8-LABEL: test_none_v4i32: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; CHECK-BE-P8-NEXT: mtvsrwz v2, r5 -; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 -; CHECK-BE-P8-NEXT: vperm v2, v2, v2, v3 +; CHECK-BE-P8-NEXT: sldi r4, r5, 56 ; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3 -; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_1@toc@ha -; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_1@toc@l +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrd v2, r4 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l ; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v2 ; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-BE-P8-NEXT: stxsdx v2, 0, r3 ; CHECK-BE-P8-NEXT: blr @@ -291,27 +284,24 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 ; CHECK-BE-P9-LABEL: test_none_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: sldi r3, r5, 56 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 ; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-P9-NEXT: mtvsrwz v3, r5 ; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-P9-NEXT: lxv vs0, 0(r3) -; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_1@toc@ha -; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_1@toc@l +; CHECK-BE-P9-NEXT: vmrghh v3, v3, v3 ; CHECK-BE-P9-NEXT: lxv v4, 0(r3) -; CHECK-BE-P9-NEXT: xxperm v3, v3, vs0 ; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P9-NEXT: stxsd v2, 0(r3) ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_none_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r4, L..C3(r2) # %const.0 -; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r5 -; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 -; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: sldi r4, r5, 56 ; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v2 ; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-AIX-64-P8-NEXT: stxsdx v2, 0, r3 ; CHECK-AIX-64-P8-NEXT: blr @@ -319,12 +309,11 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 ; CHECK-AIX-64-P9-LABEL: test_none_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r5, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 ; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r5 -; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3) -; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.1 +; CHECK-AIX-64-P9-NEXT: vmrghh v3, v3, v3 ; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) -; CHECK-AIX-64-P9-NEXT: xxperm v3, v3, vs0 ; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-64-P9-NEXT: stxsd v2, 0(r3) ; CHECK-AIX-64-P9-NEXT: blr @@ -421,7 +410,7 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl ; CHECK-AIX-64-P8-LABEL: test_v4i32_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C4(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3 ; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 @@ -431,7 +420,7 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl ; CHECK-AIX-64-P9-LABEL: test_v4i32_none: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: xxperm vs0, vs2, vs1 @@ -531,7 +520,7 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl ; CHECK-AIX-64-P8-LABEL: test_none_v2i64: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C5(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 @@ -543,7 +532,7 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl ; CHECK-AIX-64-P9-LABEL: test_none_v2i64: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) -; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4) ; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) @@ -635,7 +624,7 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) { ; CHECK-AIX-64-P8-LABEL: test_v2i64_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3 ; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 @@ -645,7 +634,7 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) { ; CHECK-AIX-64-P9-LABEL: test_v2i64_none: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) -; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: xxperm vs0, vs2, vs1 @@ -739,7 +728,7 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) { ; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 ; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r4) ; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C8(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C7(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-64-P8-NEXT: blr @@ -747,7 +736,7 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) { ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C7(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r4 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 @@ -784,78 +773,75 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-LE-P8-LABEL: test_v8i16_v4i32: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: lhz r3, 0(r3) -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, f0 -; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4 -; CHECK-LE-P9-NEXT: xxswapd v3, f0 -; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v4i32: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: lhz r3, 0(r3) -; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-BE-P8-NEXT: sldi r3, r3, 48 -; CHECK-BE-P8-NEXT: mtvsrd v3, r3 -; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4 -; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1 -; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C8(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1 -; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 -; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C7(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 -; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C8(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4 -; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C7(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-32-P9-NEXT: blr entry: %0 = load <2 x i8>, ptr %a @@ -874,20 +860,16 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-LE-P8-LABEL: test_v8i16_v2i64: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: lhz r3, 0(r3) -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, f0 -; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v2i64: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-LE-P9-NEXT: lfd f0, 0(r4) -; CHECK-LE-P9-NEXT: xxswapd v3, f0 -; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v2i64: @@ -927,21 +909,20 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 -; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4 -; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C8(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-32-P9-NEXT: blr entry: %0 = load <2 x i8>, ptr %a @@ -1045,7 +1026,7 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C8(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C10(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 @@ -1057,7 +1038,7 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon ; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C7(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C9(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4 ; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) @@ -1078,78 +1059,75 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-LE-P8-LABEL: test_v4i32_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: lhz r3, 0(r3) -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, f0 -; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4 -; CHECK-LE-P9-NEXT: xxswapd v3, f0 -; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v8i16: ; CHECK-BE-P8: # %bb.0: # %entry ; CHECK-BE-P8-NEXT: lhz r3, 0(r3) -; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-BE-P8-NEXT: sldi r3, r3, 48 -; CHECK-BE-P8-NEXT: mtvsrd v3, r3 -; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v4i32_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI10_0@toc@ha ; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4 -; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1 -; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 -; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 -; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C10(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C9(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1 -; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 -; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 -; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C11(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4 -; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C10(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-32-P9-NEXT: blr entry: %0 = load <2 x i8>, ptr %a @@ -1167,20 +1145,16 @@ entry: define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-LE-P8-LABEL: test_v4i32_v2i64: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, f0 -; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, f0 -; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v4i32_v2i64: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P9-NEXT: xxswapd v2, f0 -; CHECK-LE-P9-NEXT: lfd f0, 0(r4) -; CHECK-LE-P9-NEXT: xxswapd v3, f0 -; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v4i32_v2i64: @@ -1218,7 +1192,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C12(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 @@ -1227,7 +1201,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C8(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C11(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 @@ -1310,7 +1284,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon ; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C10(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C11(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 @@ -1322,7 +1296,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon ; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) -; CHECK-AIX-64-P9-NEXT: ld r3, L..C9(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C10(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r4) ; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) @@ -1334,7 +1308,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C10(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C13(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 @@ -1346,7 +1320,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C12(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4 ; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) @@ -1366,20 +1340,16 @@ entry: define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-LE-P8-LABEL: test_v2i64_v4i32: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, f0 -; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, f0 -; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v2i64_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P9-NEXT: xxswapd v2, f0 -; CHECK-LE-P9-NEXT: lfd f0, 0(r4) -; CHECK-LE-P9-NEXT: xxswapd v3, f0 -; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v2i64_v4i32: @@ -1417,7 +1387,7 @@ define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C11(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C14(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 @@ -1426,7 +1396,7 @@ define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C10(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C13(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 @@ -1448,20 +1418,16 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-LE-P8-LABEL: test_v2i64_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: lhz r3, 0(r3) -; CHECK-LE-P8-NEXT: mtfprd f0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 -; CHECK-LE-P8-NEXT: xxswapd v3, f0 -; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v2i64_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 -; CHECK-LE-P9-NEXT: lfd f0, 0(r4) -; CHECK-LE-P9-NEXT: xxswapd v3, f0 -; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 -; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v2i64_v8i16: @@ -1501,21 +1467,20 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4 -; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 -; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C15(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4 -; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C14(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-32-P9-NEXT: blr entry: %0 = load <2 x i8>, ptr %a