Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ set( JIT_ARM64_SOURCES
unwindarm64.cpp
hwintrinsicarm64.cpp
hwintrinsiccodegenarm64.cpp
morpharm64.cpp
)

set( JIT_ARMV6_SOURCES
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3130,6 +3130,7 @@ class Compiler

#if defined(TARGET_ARM64)
GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize);
GenTree* gtNewSimdAllFalseMaskNode(unsigned simdSize);
#endif

GenTree* gtNewSimdBinOpNode(genTreeOps op,
Expand Down Expand Up @@ -6683,6 +6684,12 @@ class Compiler
GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree);
GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node);
#ifdef TARGET_ARM64
bool canMorphVectorOperandToMask(GenTree* node);
bool canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node);
GenTree* doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent);
GenTree* fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node);
#endif // TARGET_ARM64
#endif // FEATURE_HW_INTRINSICS
GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree);
GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp);
Expand Down
19 changes: 18 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20569,7 +20569,24 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
}
}
#elif defined(TARGET_ARM64)
return HWIntrinsicInfo::HasRMWSemantics(AsHWIntrinsic()->GetHWIntrinsicId());
NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId();
switch (id)
{
case NI_Sve_And:
case NI_Sve_BitwiseClear:
case NI_Sve_Xor:
case NI_Sve_Or:
// Mask variant is not RMW, but the vector variant is.
if (varTypeIsMask(this))
{
assert(AsHWIntrinsic()->GetOperandCount() == 3);
return false;
}
break;
default:
break;
}
return HWIntrinsicInfo::HasRMWSemantics(id);
#else
return false;
#endif
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6617,6 +6617,10 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
}
}

#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS)
bool HasAllMaskVariant();
#endif

private:
void SetHWIntrinsicId(NamedIntrinsic intrinsicId);

Expand Down
16 changes: 15 additions & 1 deletion src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3248,7 +3248,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
}

//------------------------------------------------------------------------
// gtNewSimdEmbeddedMaskNode: Create an embedded mask
// gtNewSimdAllTrueMaskNode: Create an embedded mask with all bits set to true
//
// Arguments:
// simdBaseJitType -- the base jit type of the nodes being masked
Expand All @@ -3262,4 +3262,18 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigne
return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize);
}

//------------------------------------------------------------------------
// gtNewSimdAllFalseMaskNode: Create an embedded mask with all bits set to false
//
// Arguments:
// simdSize -- the simd size of the nodes being masked
//
// Return Value:
// The mask
//
GenTree* Compiler::gtNewSimdAllFalseMaskNode(unsigned simdSize)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
GenTree* Compiler::gtNewSimdAllFalseMaskNode(unsigned simdSize)
GenTree* Compiler::gtNewSimdFalseMaskByteNode(unsigned simdSize)

{
return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_UBYTE, simdSize);
}

#endif // FEATURE_HW_INTRINSICS
9 changes: 9 additions & 0 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9218,6 +9218,15 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
}
}

#ifdef TARGET_ARM64
optimizedTree = fgMorphTryUseAllMaskVariant(node);
if (optimizedTree != nullptr)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having it here might be preventing the node from getting further transformations/optimizations. Should this be done towards the end of this method?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems fine, I've moved it later in the method.

{
optimizedTree->SetMorphed(this);
return optimizedTree;
}
#endif

NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
var_types retType = node->TypeGet();
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
Expand Down
206 changes: 206 additions & 0 deletions src/coreclr/jit/morpharm64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XX XX
XX Arm64 Specific Morph XX
XX XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/

#include "jitpch.h"
#ifdef _MSC_VER
#pragma hdrstop
#endif

#ifdef FEATURE_MASKED_HW_INTRINSICS

//------------------------------------------------------------------------
// HasAllMaskVariant: Does this intrinsic have a variant where all of it's operands
// are mask types?
//
// Return Value:
// true if an all-mask variant exists for the intrinsic, else false.
//
bool GenTreeHWIntrinsic::HasAllMaskVariant()
{
switch (GetHWIntrinsicId())
{
// ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wondering if we should add a HW_Flag_AllMaskVariant for this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't want to use flag space as this list is unlikely to grow, these were the only instructions I could find that follow this pattern across all versions of SVE. But it might make it easier to apply this transform to other intrinsics in future if we find other patterns work too.

// ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
// UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
// UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
// TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
// TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
// REV <Pd>.<T>, <Pn>.<T>
case NI_Sve_ZipHigh:
case NI_Sve_ZipLow:
case NI_Sve_UnzipOdd:
case NI_Sve_UnzipEven:
case NI_Sve_TransposeEven:
case NI_Sve_TransposeOdd:
case NI_Sve_ReverseElement:
return true;

default:
return false;
}
}

//------------------------------------------------------------------------
// canMorphVectorOperandToMask: Can this vector operand be converted to a
// node with type TYP_MASK easily?
//
bool Compiler::canMorphVectorOperandToMask(GenTree* node)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just one suggestion would be to have it in morph.cpp itself instead of creating new file. We have lot of xarch and arm64 code scattered in morph.cpp, and if we truly want morpharm64.cpp, we should move that too (someday)

{
return varTypeIsMask(node) || node->OperIsConvertMaskToVector() || node->IsVectorZero();
}

//------------------------------------------------------------------------
// canMorphAllVectorOperandsToMasks: Can all vector operands to this node
// be converted to a node with type
// TYP_MASK easily?
//
bool Compiler::canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node)
{
bool allMaskConversions = true;
for (size_t i = 1; i <= node->GetOperandCount() && allMaskConversions; i++)
{
allMaskConversions &= canMorphVectorOperandToMask(node->Op(i));
}

return allMaskConversions;
}

//------------------------------------------------------------------------
// doMorphVectorOperandToMask: Morph a vector node that is close to a mask
// node into a mask node.
//
// Return value:
// The morphed tree, or nullptr if the transform is not applicable.
//
GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent)
{
if (varTypeIsMask(node))
{
// Already a mask, nothing to do.
return node;
}
else if (node->OperIsConvertMaskToVector())
{
// Replace node with op1.
return node->AsHWIntrinsic()->Op(1);
}
else if (node->IsVectorZero())
{
// Morph the vector of zeroes into mask of zeroes.
GenTree* mask = gtNewSimdAllFalseMaskNode(parent->GetSimdSize());
mask->SetMorphed(this);
return mask;
}

return nullptr;
}

//-----------------------------------------------------------------------------------------------------
// fgMorphTryUseAllMaskVariant: For NamedIntrinsics that have a variant where all operands are
// mask nodes. If all operands to this node are 'suggesting' that they
// originate closely from a mask, but are of vector types, then morph the
// operands as appropriate to use mask types instead. 'Suggesting'
// is defined by the canMorphVectorOperandToMask function.
//
// Arguments:
// tree - The HWIntrinsic to try and optimize.
//
// Return Value:
// The fully morphed tree if a change was made, else nullptr.
//
GenTree* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node)
{
if (node->HasAllMaskVariant() && canMorphAllVectorOperandsToMasks(node))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes sense to have node->HasAllMaskVariant() inside canMorphAllVectorOperandsToMasks() itself. That way for conditional select's left operand too, you can (and should) exercise it.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with this change if all of these intrinsics have HasAllMaskVariant() == true, but I don't think this works, see my comment below.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If #114438 (comment) works, then consider tagging them with HW_Flag_AllMaskVariant, move the HasAllMaskVariant() inside canMorphAllVectorOperandsToMasks. Having HW_Flag_AllMaskVariant in table helps in easy discoverability of various flags in one place.

{
for (size_t i = 1; i <= node->GetOperandCount(); i++)
{
node->Op(i) = doMorphVectorOperandToMask(node->Op(i), node);
}

node->gtType = TYP_MASK;
return node;
}

if (node->OperIsHWIntrinsic(NI_Sve_ConditionalSelect))
{
GenTree* mask = node->Op(1);
GenTree* left = node->Op(2);
GenTree* right = node->Op(3);

if (left->OperIsHWIntrinsic())
{
assert(canMorphVectorOperandToMask(mask));

if (canMorphAllVectorOperandsToMasks(left->AsHWIntrinsic()))
{
// At this point we know the 'left' node is a HWINTRINSIC node and all of its operands look like
// mask nodes.
//
// The ConditionalSelect could be substituted for the named intrinsic in it's 'left' operand and
// transformed to a mask-type operation for some named intrinsics. Doing so will encourage codegen
// to emit predicate variants of instructions rather than vector variants, and we can lose some
// unnecessary mask->vector conversion nodes.
GenTreeHWIntrinsic* actualOp = left->AsHWIntrinsic();

switch (actualOp->GetHWIntrinsicId())
{
// AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
// BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
// EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
// ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
case NI_Sve_And:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think these too should be marked as HW_Flag_AllMaskVariant and looked for in HasAllMaskVariant() itself.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried grouping these intrinsics with the others initially but it doesn't work because these ones should only be considered in relation to a ConditionalSelect. Grouping them with the others causes a transformation to run when a ConditionalSelect is not present, which wouldn't be correct for these instructions because they require the mask parameter for the governing predicate.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We wrap the IR nodes that has embedded mask semantics like And inside a ConditionalSelect during lowering, which runs way after the morphing phase where you are doing this optimization. See if (HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinsicId)) in LowerHWIntrinsic(). Until then, they continue to hold Vector operands. If you do the transformation here for IR nodes that has And(mask, mask), it shouldn't prohibit us from wrapping it in ConditionalSelect in lowering.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently it's marked HW_Flag_OptionalEmbeddedMaskedOperation, so I think this wrapping isn't occurring for this intrinsic? When I try to implement it like this, it changes all the operands to masks and then tries to emit AND <Zd>.D, <Zn>.D, <Zm>.D and runs into this assert because the register types are wrong:

assert(isVectorRegister(reg3)); // ddddd

The mask variant of this intrinsic has an embedded mask, but it's required for this instruction instead of optional, so there would also need to be some handling of this edge case in codegen to make sure it definitely wraps the mask variant in ConditionalSelect. It feels like there should be a separate set of flags for when the intrinsic is TYP_MASK or TYP_SIMD. E.g. HW_Flag_MaskVariant(Optional)EmbeddedMaskOperation, etc.

Copy link
Copy Markdown
Contributor

@kunalspathak kunalspathak Apr 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should have a separate intrinsics And_Predicates (and likewise for other APIs that have predicates variant). They are added in the section "Special intrinsics that are generated during importing or lowering". And_Predicates should have HW_Flag_EmbeddedMaskedOperation. We can have flag HW_Flag_AllMaskVariant on SVE_And intrinsics, to detect it in morph if this can be transformed into And_Predicates variant.

We come here in the morph and see And(Vector, Vector). If operands are mask, we can transform the node into And_Predicates(Mask, Mask). During lowering, we can then transform it into CndSel(AllTrue, And_Predicates(Mask, Mask), Zero) and codegen will handle generating the predicated version of And (predicates).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sounds much better than what I was thinking, I'll try and implement this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've pushed this implementation, it seems simpler. N.B. we've run out of flag space now as I've taken the last bit for HW_Flag_HasMaskVariant. We might need to think about expanding the flag space for SVE2.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've pushed this implementation, it seems simpler. N.B. we've run out of flag space now as I've taken the last bit for HW_Flag_HasMaskVariant. We might need to think about expanding the flag space for SVE2.

Added #115474 to track this

case NI_Sve_BitwiseClear:
case NI_Sve_Xor:
case NI_Sve_Or:
if (right->IsVectorZero())
{
// The operation is equivalent for all lane arrangements, because it is a bitwise operation.
// It's safe to bash the type to 8-bit required to assemble the instruction.
actualOp->SetSimdBaseJitType(CORINFO_TYPE_BYTE);

actualOp->ResetHWIntrinsicId(actualOp->GetHWIntrinsicId(), this,
doMorphVectorOperandToMask(mask, actualOp),
doMorphVectorOperandToMask(actualOp->Op(1), actualOp),
doMorphVectorOperandToMask(actualOp->Op(2), actualOp));
actualOp->gtType = TYP_MASK;
return actualOp;
}
break;
default:
break;
}
}
}

// If we got this far, then there was no match on any predicated operation.
// ConditionalSelect itself can be a mask operation for 8-bit lane types, using
// SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B
if (canMorphAllVectorOperandsToMasks(node))
{
for (size_t i = 1; i <= node->GetOperandCount(); i++)
{
node->Op(i) = doMorphVectorOperandToMask(node->Op(i), node);
}

// Again this operation is bitwise, so the lane arrangement doesn't matter.
// We can bash the type to 8-bit.
node->SetSimdBaseJitType(CORINFO_TYPE_BYTE);

node->gtType = TYP_MASK;
return node;
}
}

return nullptr;
}

#endif // FEATURE_MASKED_HW_INTRINSICS
8 changes: 8 additions & 0 deletions src/tests/JIT/opt/SVE/Directory.Build.props
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<Project>
<Import Project="$([MSBuild]::GetPathOfFileAbove(Directory.Build.props, $(MSBuildThisFileDirectory)..))" />
<PropertyGroup>
<NoWarn>$(NoWarn);SYSLIB5003</NoWarn>
<CLRTestTargetUnsupported Condition="'$(TargetArchitecture)' != 'arm64' or '$(TargetOS)' == 'osx' or '$(RuntimeFlavor)' == 'mono'">true</CLRTestTargetUnsupported>
</PropertyGroup>
</Project>
Loading
Loading