@@ -42365,21 +42365,23 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4236542365 // If we're permuting the upper 256-bits subvectors of a concatenation, then
4236642366 // see if we can peek through and access the subvector directly.
4236742367 if (VT.is512BitVector()) {
42368- // 512-bit mask uses 4 x i2 indices - if the msb is always set then only the
42369- // upper subvector is used.
42368+ // 512-bit mask uses 4 x i2 indices - if the msb is always set then only
42369+ // the upper subvector is used.
4237042370 SDValue LHS = N->getOperand(0);
4237142371 SDValue RHS = N->getOperand(1);
4237242372 uint64_t Mask = N->getConstantOperandVal(2);
4237342373 SmallVector<SDValue> LHSOps, RHSOps;
4237442374 SDValue NewLHS, NewRHS;
42375- if ((Mask & 0x0A) == 0x0A &&
42376- collectConcatOps(LHS.getNode(), LHSOps, DAG) && LHSOps.size( ) == 2 ) {
42375+ if (collectConcatOps(peekThroughBitcasts(LHS).getNode(), LHSOps, DAG) &&
42376+ LHSOps.size() == 2 && (Mask & 0x0A ) == 0x0A ) {
4237742377 NewLHS = widenSubVector(LHSOps[1], false, Subtarget, DAG, DL, 512);
42378+ NewLHS = DAG.getBitcast(VT, NewLHS);
4237842379 Mask &= ~0x0A;
4237942380 }
42380- if ((Mask & 0xA0) == 0xA0 &&
42381- collectConcatOps(RHS.getNode(), RHSOps, DAG) && RHSOps.size( ) == 2 ) {
42381+ if (collectConcatOps(peekThroughBitcasts(RHS).getNode(), RHSOps, DAG) &&
42382+ RHSOps.size() == 2 && (Mask & 0xA0 ) == 0xA0 ) {
4238242383 NewRHS = widenSubVector(RHSOps[1], false, Subtarget, DAG, DL, 512);
42384+ NewRHS = DAG.getBitcast(VT, NewRHS);
4238342385 Mask &= ~0xA0;
4238442386 }
4238542387 if (NewLHS || NewRHS)
0 commit comments