@@ -7936,12 +7936,26 @@ GenTree* Compiler::gtNewAllBitsSetConNode(var_types type)
79367936
79377937 switch (type)
79387938 {
7939+ case TYP_BYTE:
7940+ case TYP_UBYTE:
7941+ {
7942+ return gtNewIconNode(0xFF);
7943+ }
7944+
7945+ case TYP_SHORT:
7946+ case TYP_USHORT:
7947+ {
7948+ return gtNewIconNode(0xFFFF);
7949+ }
7950+
79397951 case TYP_INT:
7952+ case TYP_UINT:
79407953 {
79417954 return gtNewIconNode(-1);
79427955 }
79437956
79447957 case TYP_LONG:
7958+ case TYP_ULONG:
79457959 {
79467960 return gtNewLconNode(-1);
79477961 }
@@ -20925,8 +20939,6 @@ GenTree* Compiler::gtNewSimdBinOpNode(
2092520939
2092620940 unsigned shiftCountMask = (genTypeSize(simdBaseType) * 8) - 1;
2092720941
20928- GenTree* nonConstantByteShiftCountOp = NULL;
20929-
2093020942 if (op2->IsCnsIntOrI())
2093120943 {
2093220944 op2->AsIntCon()->gtIconVal &= shiftCountMask;
@@ -21090,39 +21102,73 @@ GenTree* Compiler::gtNewSimdBinOpNode(
2109021102 }
2109121103
2109221104#if defined(TARGET_XARCH)
21093- case GT_RSZ:
2109421105 case GT_LSH:
21106+ case GT_RSH:
21107+ case GT_RSZ:
2109521108 {
21096- // We don't have actual instructions for shifting bytes, so we'll emulate them
21097- // by shifting 32-bit values and masking off the bits that should be zeroed .
21109+ // This emulates byte shift instructions, which don't exist in x86 SIMD,
21110+ // plus arithmetic shift of qwords, which did not exist before AVX-512 .
2109821111
21099- assert(varTypeIsByte(simdBaseType));
21112+ assert(varTypeIsByte(simdBaseType) || (varTypeIsLong(simdBaseType) && (op == GT_RSH)) );
2110021113
21101- intrinsic =
21102- GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, op, op1, op2ForLookup, TYP_INT, simdSize, false);
21114+ // We will emulate arithmetic shift by using logical shift and then masking in the sign bits.
21115+ genTreeOps instrOp = op == GT_RSH ? GT_RSZ : op;
21116+ intrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, instrOp, op1, op2ForLookup,
21117+ genActualType(simdBaseType), simdSize, false);
2110321118 assert(intrinsic != NI_Illegal);
2110421119
2110521120 GenTree* maskAmountOp;
2110621121
2110721122 if (op2->IsCnsIntOrI())
2110821123 {
2110921124 ssize_t shiftCount = op2->AsIntCon()->gtIconVal;
21110- ssize_t mask = op == GT_RSZ ? (255 >> shiftCount) : ((255 << shiftCount) & 0xFF);
21111-
21112- maskAmountOp = gtNewIconNode(mask, type);
21125+ if (varTypeIsByte(simdBaseType))
21126+ {
21127+ ssize_t mask = op == GT_LSH ? ((0xFF << shiftCount) & 0xFF) : (0xFF >> shiftCount);
21128+ maskAmountOp = gtNewIconNode(mask, type);
21129+ }
21130+ else
21131+ {
21132+ int64_t mask = static_cast<int64_t>(0xFFFFFFFFFFFFFFFFULL >> shiftCount);
21133+ maskAmountOp = gtNewLconNode(mask);
21134+ }
2111321135 }
2111421136 else
2111521137 {
2111621138 assert(op2->OperIsHWIntrinsic(NI_Vector128_CreateScalar));
2111721139
21118- GenTree* nonConstantByteShiftCountOp = fgMakeMultiUse(&op2->AsHWIntrinsic()->Op(1));
21119- maskAmountOp = gtNewOperNode(op, TYP_INT, gtNewIconNode(255), nonConstantByteShiftCountOp);
21140+ GenTree* shiftCountDup = fgMakeMultiUse(&op2->AsHWIntrinsic()->Op(1));
21141+ if (op == GT_RSH)
21142+ {
21143+ // For arithmetic shift, we will be using ConditionalSelect to mask in the sign bits, which means
21144+ // the mask will be evaluated before the shift. We swap the copied operand with the shift amount
21145+ // operand here in order to preserve correct evaluation order for the masked shift count.
21146+ std::swap(shiftCountDup, op2->AsHWIntrinsic()->Op(1));
21147+ }
21148+
21149+ maskAmountOp = gtNewOperNode(instrOp, genActualType(simdBaseType), gtNewAllBitsSetConNode(simdBaseType),
21150+ shiftCountDup);
2112021151 }
2112121152
21122- GenTree* shiftOp = gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, CORINFO_TYPE_INT, simdSize);
21123- GenTree* maskOp = gtNewSimdCreateBroadcastNode(type, maskAmountOp, simdBaseJitType, simdSize);
21153+ if (op == GT_RSH)
21154+ {
21155+ GenTree* op1Dup = fgMakeMultiUse(&op1);
21156+ GenTree* signOp =
21157+ gtNewSimdCmpOpNode(GT_GT, type, gtNewZeroConNode(type), op1Dup, simdBaseJitType, simdSize);
21158+
21159+ CorInfoType shiftType = varTypeIsSmall(simdBaseType) ? CORINFO_TYPE_INT : simdBaseJitType;
21160+ GenTree* shiftOp = gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, shiftType, simdSize);
21161+ GenTree* maskOp = gtNewSimdCreateBroadcastNode(type, maskAmountOp, simdBaseJitType, simdSize);
21162+
21163+ return gtNewSimdCndSelNode(type, maskOp, shiftOp, signOp, simdBaseJitType, simdSize);
21164+ }
21165+ else
21166+ {
21167+ GenTree* shiftOp = gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, CORINFO_TYPE_INT, simdSize);
21168+ GenTree* maskOp = gtNewSimdCreateBroadcastNode(type, maskAmountOp, simdBaseJitType, simdSize);
2112421169
21125- return gtNewSimdBinOpNode(GT_AND, type, shiftOp, maskOp, simdBaseJitType, simdSize);
21170+ return gtNewSimdBinOpNode(GT_AND, type, shiftOp, maskOp, simdBaseJitType, simdSize);
21171+ }
2112621172 }
2112721173#endif // TARGET_XARCH
2112821174
0 commit comments