Skip to content

Commit 0b64041

Browse files
Implement AddSaturate, SubtractSaturate and NarrowWithSaturation on the Vector types (#115525)
* Implement AddSaturate, SubtractSaturate and NarrowWithSaturation on the Vector types * Apply formatting patch * Ensure the add test uses + instead of - * Ensure the SubtractSaturate fallback returns MinValue for unsigned * Ensure AddSaturate/SubtractSaturate make the result multi-use * Use the named constants to ensure sign extension works * Ensure tmp and tmpDup1 are passed in the correct order for Add/SubtractSaturate * Remove unnecessary assert
1 parent 03635a7 commit 0b64041

File tree

19 files changed

+3298
-806
lines changed

19 files changed

+3298
-806
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25383,14 +25383,11 @@ GenTree* Compiler::gtNewSimdNarrowNode(
2538325383
else
2538425384
{
2538525385
// var tmp1 = op1.ToVector128Unsafe();
25386-
// var tmp2 = AdvSimd.InsertScalar(tmp1.AsUInt64(), 1, op2.AsUInt64()).As<T>(); - signed integer use int64,
25387-
// unsigned integer use uint64
25386+
// var tmp2 = tmp1.WithUpper(op2);
2538825387
// return AdvSimd.ExtractNarrowingLower(tmp2);
2538925388

25390-
CorInfoType tmp2BaseJitType = varTypeIsSigned(simdBaseType) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
25391-
2539225389
tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128Unsafe, simdBaseJitType, simdSize);
25393-
tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, tmp2BaseJitType, 16);
25390+
tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, simdBaseJitType, 16);
2539425391

2539525392
return gtNewSimdHWIntrinsicNode(type, tmp2, NI_AdvSimd_ExtractNarrowingLower, simdBaseJitType, simdSize);
2539625393
}

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
717717
break;
718718
}
719719

720+
case NI_Vector64_AddSaturate:
721+
case NI_Vector128_AddSaturate:
722+
{
723+
assert(sig->numArgs == 2);
724+
725+
op2 = impSIMDPopStack();
726+
op1 = impSIMDPopStack();
727+
728+
if (varTypeIsFloating(simdBaseType))
729+
{
730+
retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize);
731+
}
732+
else
733+
{
734+
intrinsic = NI_AdvSimd_AddSaturate;
735+
736+
if ((simdSize == 8) && varTypeIsLong(simdBaseType))
737+
{
738+
intrinsic = NI_AdvSimd_AddSaturateScalar;
739+
}
740+
741+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
742+
}
743+
break;
744+
}
745+
720746
case NI_AdvSimd_BitwiseClear:
721747
case NI_Vector64_AndNot:
722748
case NI_Vector128_AndNot:
@@ -2122,6 +2148,39 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
21222148
break;
21232149
}
21242150

2151+
case NI_Vector64_NarrowWithSaturation:
2152+
case NI_Vector128_NarrowWithSaturation:
2153+
{
2154+
assert(sig->numArgs == 2);
2155+
2156+
op2 = impSIMDPopStack();
2157+
op1 = impSIMDPopStack();
2158+
2159+
if (varTypeIsFloating(simdBaseType))
2160+
{
2161+
retNode = gtNewSimdNarrowNode(retType, op1, op2, simdBaseJitType, simdSize);
2162+
}
2163+
else if (simdSize == 16)
2164+
{
2165+
intrinsic = NI_AdvSimd_ExtractNarrowingSaturateLower;
2166+
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, intrinsic, simdBaseJitType, 8);
2167+
2168+
intrinsic = NI_AdvSimd_ExtractNarrowingSaturateUpper;
2169+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
2170+
}
2171+
else
2172+
{
2173+
intrinsic = NI_Vector64_ToVector128Unsafe;
2174+
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, simdBaseJitType, simdSize);
2175+
2176+
op1 = gtNewSimdWithUpperNode(TYP_SIMD16, op1, op2, simdBaseJitType, 16);
2177+
2178+
intrinsic = NI_AdvSimd_ExtractNarrowingSaturateLower;
2179+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
2180+
}
2181+
break;
2182+
}
2183+
21252184
case NI_Vector64_op_UnaryNegation:
21262185
case NI_Vector128_op_UnaryNegation:
21272186
{
@@ -2598,6 +2657,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
25982657
break;
25992658
}
26002659

2660+
case NI_Vector64_SubtractSaturate:
2661+
case NI_Vector128_SubtractSaturate:
2662+
{
2663+
assert(sig->numArgs == 2);
2664+
2665+
op2 = impSIMDPopStack();
2666+
op1 = impSIMDPopStack();
2667+
2668+
if (varTypeIsFloating(simdBaseType))
2669+
{
2670+
retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize);
2671+
}
2672+
else
2673+
{
2674+
intrinsic = NI_AdvSimd_SubtractSaturate;
2675+
2676+
if ((simdSize == 8) && varTypeIsLong(simdBaseType))
2677+
{
2678+
intrinsic = NI_AdvSimd_SubtractSaturateScalar;
2679+
}
2680+
2681+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
2682+
}
2683+
break;
2684+
}
2685+
26012686
case NI_Vector64_Sum:
26022687
case NI_Vector128_Sum:
26032688
{

src/coreclr/jit/hwintrinsiclistarm64.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
// Vector64 Intrinsics
1818
#define FIRST_NI_Vector64 NI_Vector64_Abs
1919
HARDWARE_INTRINSIC(Vector64, Abs, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
20+
HARDWARE_INTRINSIC(Vector64, AddSaturate, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
2021
HARDWARE_INTRINSIC(Vector64, AndNot, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
2122
HARDWARE_INTRINSIC(Vector64, As, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
2223
HARDWARE_INTRINSIC(Vector64, AsByte, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
@@ -88,6 +89,7 @@ HARDWARE_INTRINSIC(Vector64, Min,
8889
HARDWARE_INTRINSIC(Vector64, MinNative, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
8990
HARDWARE_INTRINSIC(Vector64, MultiplyAddEstimate, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
9091
HARDWARE_INTRINSIC(Vector64, Narrow, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
92+
HARDWARE_INTRINSIC(Vector64, NarrowWithSaturation, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
9193
HARDWARE_INTRINSIC(Vector64, Round, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
9294
HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
9395
HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
@@ -97,6 +99,7 @@ HARDWARE_INTRINSIC(Vector64, Sqrt,
9799
HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
98100
HARDWARE_INTRINSIC(Vector64, StoreAlignedNonTemporal, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
99101
HARDWARE_INTRINSIC(Vector64, StoreUnsafe, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
102+
HARDWARE_INTRINSIC(Vector64, SubtractSaturate, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
100103
HARDWARE_INTRINSIC(Vector64, Sum, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
101104
HARDWARE_INTRINSIC(Vector64, ToScalar, 8, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
102105
HARDWARE_INTRINSIC(Vector64, ToVector128, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
@@ -133,6 +136,7 @@ HARDWARE_INTRINSIC(Vector64, op_UnsignedRightShift,
133136
// Vector128 Intrinsics
134137
#define FIRST_NI_Vector128 NI_Vector128_Abs
135138
HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
139+
HARDWARE_INTRINSIC(Vector128, AddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
136140
HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
137141
HARDWARE_INTRINSIC(Vector128, As, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
138142
HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
@@ -212,6 +216,7 @@ HARDWARE_INTRINSIC(Vector128, Min,
212216
HARDWARE_INTRINSIC(Vector128, MinNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
213217
HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
214218
HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
219+
HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
215220
HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
216221
HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
217222
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
@@ -221,6 +226,7 @@ HARDWARE_INTRINSIC(Vector128, Sqrt,
221226
HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
222227
HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
223228
HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
229+
HARDWARE_INTRINSIC(Vector128, SubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
224230
HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
225231
HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
226232
HARDWARE_INTRINSIC(Vector128, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)

0 commit comments

Comments
 (0)