Skip to content

Commit 97ad3a7

Browse files
JIT: Accelerate long -> floating casts on x86 (#113930)
* use SIMD conversion instructions for long -> floating casts * move transform to DecomposeLongs, restore double intermediate * formatting * handle constants --------- Co-authored-by: Bruce Forstall <[email protected]>
1 parent 7fae5e0 commit 97ad3a7

File tree

2 files changed

+94
-22
lines changed

2 files changed

+94
-22
lines changed

src/coreclr/jit/decomposelongs.cpp

Lines changed: 78 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,12 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
137137
}
138138
}
139139

140+
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86)
141+
if (!tree->TypeIs(TYP_LONG) &&
142+
!(tree->OperIs(GT_CAST) && varTypeIsLong(tree->AsCast()->CastOp()) && varTypeIsFloating(tree)))
143+
#else
140144
if (!tree->TypeIs(TYP_LONG))
145+
#endif // FEATURE_HW_INTRINSICS && TARGET_X86
141146
{
142147
return tree->gtNext;
143148
}
@@ -157,15 +162,18 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
157162

158163
GenTree* user = use.User();
159164

160-
if (user->OperIsHWIntrinsic())
165+
if (tree->TypeIs(TYP_LONG) && (user->OperIsHWIntrinsic() || (user->OperIs(GT_CAST) && varTypeIsFloating(user))))
161166
{
162167
if (tree->OperIs(GT_CNS_LNG) ||
163168
(tree->OperIs(GT_IND, GT_LCL_FLD) && m_lowering->IsSafeToContainMem(user, tree)))
164169
{
165-
NamedIntrinsic intrinsicId = user->AsHWIntrinsic()->GetHWIntrinsicId();
166-
assert(HWIntrinsicInfo::IsVectorCreate(intrinsicId) ||
167-
HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId) ||
168-
HWIntrinsicInfo::IsVectorCreateScalarUnsafe(intrinsicId));
170+
if (user->OperIsHWIntrinsic())
171+
{
172+
NamedIntrinsic intrinsicId = user->AsHWIntrinsic()->GetHWIntrinsicId();
173+
assert(HWIntrinsicInfo::IsVectorCreate(intrinsicId) ||
174+
HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId) ||
175+
HWIntrinsicInfo::IsVectorCreateScalarUnsafe(intrinsicId));
176+
}
169177

170178
return tree->gtNext;
171179
}
@@ -562,28 +570,78 @@ GenTree* DecomposeLongs::DecomposeStoreLclFld(LIR::Use& use)
562570
GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
563571
{
564572
assert(use.IsInitialized());
565-
assert(use.Def()->OperGet() == GT_CAST);
573+
assert(use.Def()->OperIs(GT_CAST));
566574

567-
GenTree* cast = use.Def()->AsCast();
568-
GenTree* loResult = nullptr;
569-
GenTree* hiResult = nullptr;
575+
GenTreeCast* cast = use.Def()->AsCast();
576+
var_types srcType = cast->CastFromType();
577+
var_types dstType = cast->CastToType();
570578

571-
var_types srcType = cast->CastFromType();
572-
var_types dstType = cast->CastToType();
573-
574-
if ((cast->gtFlags & GTF_UNSIGNED) != 0)
579+
if (cast->IsUnsigned())
575580
{
576581
srcType = varTypeToUnsigned(srcType);
577582
}
578583

579-
bool skipDecomposition = false;
584+
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86)
585+
if (varTypeIsFloating(dstType))
586+
{
587+
// We will reach this path only if morph did not convert the cast to a helper call,
588+
// meaning we can perform the cast using SIMD instructions.
589+
// The sequence this creates is simply:
590+
// AVX512DQ.VL.ConvertToVector128Single(Vector128.CreateScalarUnsafe(LONG)).ToScalar()
591+
592+
NamedIntrinsic intrinsicId = NI_Illegal;
593+
GenTree* srcOp = cast->CastOp();
594+
var_types dstType = cast->CastToType();
595+
CorInfoType baseFloatingType = (dstType == TYP_FLOAT) ? CORINFO_TYPE_FLOAT : CORINFO_TYPE_DOUBLE;
596+
CorInfoType baseIntegralType = cast->IsUnsigned() ? CORINFO_TYPE_ULONG : CORINFO_TYPE_LONG;
597+
598+
assert(!cast->gtOverflow());
599+
600+
if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL))
601+
{
602+
intrinsicId = (dstType == TYP_FLOAT) ? NI_AVX512DQ_VL_ConvertToVector128Single
603+
: NI_AVX512DQ_VL_ConvertToVector128Double;
604+
}
605+
else
606+
{
607+
assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v1));
608+
intrinsicId =
609+
(dstType == TYP_FLOAT) ? NI_AVX10v1_ConvertToVector128Single : NI_AVX10v1_ConvertToVector128Double;
610+
}
611+
612+
GenTree* createScalar = m_compiler->gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, srcOp, baseIntegralType, 16);
613+
GenTree* convert =
614+
m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, createScalar, intrinsicId, baseIntegralType, 16);
615+
GenTree* toScalar = m_compiler->gtNewSimdToScalarNode(dstType, convert, baseFloatingType, 16);
616+
617+
Range().InsertAfter(cast, createScalar, convert, toScalar);
618+
Range().Remove(cast);
619+
620+
if (createScalar->IsCnsVec())
621+
{
622+
Range().Remove(srcOp);
623+
}
624+
625+
if (use.IsDummyUse())
626+
{
627+
toScalar->SetUnusedValue();
628+
}
629+
use.ReplaceWith(toScalar);
630+
631+
return toScalar->gtNext;
632+
}
633+
#endif // FEATURE_HW_INTRINSICS && TARGET_X86
634+
635+
bool skipDecomposition = false;
636+
GenTree* loResult = nullptr;
637+
GenTree* hiResult = nullptr;
580638

581639
if (varTypeIsLong(srcType))
582640
{
583641
if (cast->gtOverflow() && (varTypeIsUnsigned(srcType) != varTypeIsUnsigned(dstType)))
584642
{
585-
GenTree* srcOp = cast->gtGetOp1();
586-
noway_assert(srcOp->OperGet() == GT_LONG);
643+
GenTree* srcOp = cast->CastOp();
644+
noway_assert(srcOp->OperIs(GT_LONG));
587645
GenTree* loSrcOp = srcOp->gtGetOp1();
588646
GenTree* hiSrcOp = srcOp->gtGetOp2();
589647

@@ -595,13 +653,13 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
595653
// check provided by codegen.
596654
//
597655

598-
const bool signExtend = (cast->gtFlags & GTF_UNSIGNED) == 0;
656+
const bool signExtend = !cast->IsUnsigned();
599657
loResult = EnsureIntSized(loSrcOp, signExtend);
600658

601659
hiResult = cast;
602660
hiResult->gtType = TYP_INT;
603661
hiResult->AsCast()->gtCastType = TYP_UINT;
604-
hiResult->gtFlags &= ~GTF_UNSIGNED;
662+
hiResult->ClearUnsigned();
605663
hiResult->AsOp()->gtOp1 = hiSrcOp;
606664

607665
Range().Remove(srcOp);
@@ -631,7 +689,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
631689
}
632690
else
633691
{
634-
if (!use.IsDummyUse() && (use.User()->OperGet() == GT_MUL))
692+
if (!use.IsDummyUse() && use.User()->OperIs(GT_MUL))
635693
{
636694
//
637695
// This int->long cast is used by a GT_MUL that will be transformed by DecomposeMul into a
@@ -646,7 +704,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
646704
}
647705
else if (varTypeIsUnsigned(srcType))
648706
{
649-
const bool signExtend = (cast->gtFlags & GTF_UNSIGNED) == 0;
707+
const bool signExtend = !cast->IsUnsigned();
650708
loResult = EnsureIntSized(cast->gtGetOp1(), signExtend);
651709

652710
hiResult = m_compiler->gtNewZeroConNode(TYP_INT);

src/coreclr/jit/morph.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,11 +417,16 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
417417
// Because there is no IL instruction conv.r4.un, uint/ulong -> float
418418
// casts are always imported as CAST(float <- CAST(double <- uint/ulong)).
419419
// We can usually eliminate the redundant intermediate cast as an optimization.
420+
//
420421
// AArch and xarch+EVEX have instructions that can cast directly from
421-
// all integers (except for longs on 32-bit of course) to floats.
422+
// all integers (except for longs on ARM32) to floats.
422423
// On x64, we also have the option of widening uint -> long and
423424
// using the signed conversion instructions, and ulong -> float/double
424425
// is handled directly in codegen, so we can allow all casts.
426+
//
427+
// This logic will also catch CAST(float <- CAST(double <- float))
428+
// and reduce it to CAST(float <- float), which is handled in codegen as
429+
// an optional mov.
425430
else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && oper->OperIs(GT_CAST)
426431
#ifndef TARGET_64BIT
427432
&& !varTypeIsLong(oper->AsCast()->CastOp())
@@ -481,6 +486,15 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
481486
#endif // TARGET_AMD64
482487

483488
#ifdef TARGET_X86
489+
#ifdef FEATURE_HW_INTRINSICS
490+
else if (varTypeIsLong(srcType) && varTypeIsFloating(dstType) && canUseEvexEncoding())
491+
{
492+
// We can handle these casts directly using SIMD instructions.
493+
// The transform to SIMD is done in DecomposeLongs.
494+
return nullptr;
495+
}
496+
#endif // FEATURE_HW_INTRINSICS
497+
484498
// Do we have to do two step U4/8 -> R4/8 ?
485499
else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
486500
{
@@ -494,7 +508,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
494508
{
495509
oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
496510
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
497-
tree->gtFlags &= ~GTF_UNSIGNED;
511+
tree->ClearUnsigned();
498512
return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
499513
}
500514
}

0 commit comments

Comments
 (0)