Skip to content

Commit 7a9ebc3

Browse files
[LLVM][AArch64] Add "u" variants of SVE saturating/rounding shift left intrinsics.
This follows the same path used for regular shift intrinsics whereby the "don't care about the result of inactive lanes" property of the associated _x builtins is preserved. In doing this we gain the ability to switch between reversed and movprfx variants depending on what best fits register allocation.
1 parent 0e6d612 commit 7a9ebc3

File tree

11 files changed

+1418
-567
lines changed

11 files changed

+1418
-567
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,25 +1293,25 @@ defm SVRSQRTE : SInstZPZ<"svrsqrte", "Ui", "aarch64_sve_ursqrte">;
12931293

12941294
//------------------------------------------------------------------------------
12951295

1296-
multiclass SInstZPZxZ<string name, string types, string pat_v, string pat_n, string intrinsic, list<FlagType> flags=[]> {
1297-
def _M : SInst<name # "[_{d}]", pat_v, types, MergeOp1, intrinsic, flags>;
1298-
def _X : SInst<name # "[_{d}]", pat_v, types, MergeAny, intrinsic, flags>;
1299-
def _Z : SInst<name # "[_{d}]", pat_v, types, MergeZero, intrinsic, flags>;
1296+
multiclass SInstZPZxZ<string name, string types, string pat_v, string pat_n, string m_intrinsic, string x_intrinsic, list<FlagType> flags=[]> {
1297+
def _M : SInst<name # "[_{d}]", pat_v, types, MergeOp1, m_intrinsic, flags>;
1298+
def _X : SInst<name # "[_{d}]", pat_v, types, MergeAny, x_intrinsic, flags>;
1299+
def _Z : SInst<name # "[_{d}]", pat_v, types, MergeZero, m_intrinsic, flags>;
13001300

1301-
def _N_M : SInst<name # "[_n_{d}]", pat_n, types, MergeOp1, intrinsic, flags>;
1302-
def _N_X : SInst<name # "[_n_{d}]", pat_n, types, MergeAny, intrinsic, flags>;
1303-
def _N_Z : SInst<name # "[_n_{d}]", pat_n, types, MergeZero, intrinsic, flags>;
1301+
def _N_M : SInst<name # "[_n_{d}]", pat_n, types, MergeOp1, m_intrinsic, flags>;
1302+
def _N_X : SInst<name # "[_n_{d}]", pat_n, types, MergeAny, x_intrinsic, flags>;
1303+
def _N_Z : SInst<name # "[_n_{d}]", pat_n, types, MergeZero, m_intrinsic, flags>;
13041304
}
13051305

13061306
let SVETargetGuard = "sve2|sme" in {
1307-
defm SVQRSHL_S : SInstZPZxZ<"svqrshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqrshl", [VerifyRuntimeMode]>;
1308-
defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl", [VerifyRuntimeMode]>;
1309-
defm SVQSHL_S : SInstZPZxZ<"svqshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqshl", [VerifyRuntimeMode]>;
1310-
defm SVQSHL_U : SInstZPZxZ<"svqshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqshl", [VerifyRuntimeMode]>;
1311-
defm SVRSHL_S : SInstZPZxZ<"svrshl", "csil", "dPdx", "dPdK", "aarch64_sve_srshl", [VerifyRuntimeMode]>;
1312-
defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_urshl", [VerifyRuntimeMode]>;
1313-
defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd", [VerifyRuntimeMode]>;
1314-
defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd", [VerifyRuntimeMode]>;
1307+
defm SVQRSHL_S : SInstZPZxZ<"svqrshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqrshl", "aarch64_sve_sqrshl_u", [VerifyRuntimeMode]>;
1308+
defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl", "aarch64_sve_uqrshl_u", [VerifyRuntimeMode]>;
1309+
defm SVQSHL_S : SInstZPZxZ<"svqshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqshl", "aarch64_sve_sqshl_u", [VerifyRuntimeMode]>;
1310+
defm SVQSHL_U : SInstZPZxZ<"svqshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqshl", "aarch64_sve_uqshl_u", [VerifyRuntimeMode]>;
1311+
defm SVRSHL_S : SInstZPZxZ<"svrshl", "csil", "dPdx", "dPdK", "aarch64_sve_srshl", "aarch64_sve_srshl_u", [VerifyRuntimeMode]>;
1312+
defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_urshl", "aarch64_sve_urshl_u", [VerifyRuntimeMode]>;
1313+
defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd", "aarch64_sve_usqadd", [VerifyRuntimeMode]>;
1314+
defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd", "aarch64_sve_suqadd", [VerifyRuntimeMode]>;
13151315

13161316
def SVABA_S : SInst<"svaba[_{d}]", "dddd", "csil" , MergeNone, "aarch64_sve_saba", [VerifyRuntimeMode]>;
13171317
def SVABA_U : SInst<"svaba[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [VerifyRuntimeMode]>;

clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qrshl.c

Lines changed: 32 additions & 32 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qshl.c

Lines changed: 32 additions & 32 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rshl.c

Lines changed: 32 additions & 32 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,14 +2449,17 @@ def int_aarch64_sve_sqrdmlsh_lane : AdvSIMD_3VectorArgIndexed_Intrinsic<[IntrSpe
24492449
def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic<[IntrSpeculatable]>;
24502450
def int_aarch64_sve_sqrdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
24512451
def int_aarch64_sve_sqrshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
2452+
def int_aarch64_sve_sqrshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24522453
def int_aarch64_sve_sqshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
2454+
def int_aarch64_sve_sqshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24532455
def int_aarch64_sve_sqshlu : AdvSIMD_SVE_ShiftByImm_Intrinsic<[IntrSpeculatable]>;
24542456
def int_aarch64_sve_sqsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24552457
def int_aarch64_sve_sqsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24562458
def int_aarch64_sve_sqsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24572459
def int_aarch64_sve_srhadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24582460
def int_aarch64_sve_sri : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
24592461
def int_aarch64_sve_srshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
2462+
def int_aarch64_sve_srshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24602463
def int_aarch64_sve_srshr : AdvSIMD_SVE_ShiftByImm_Intrinsic<[IntrSpeculatable]>;
24612464
def int_aarch64_sve_srsra : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
24622465
def int_aarch64_sve_ssra : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
@@ -2467,13 +2470,16 @@ def int_aarch64_sve_uhsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpecul
24672470
def int_aarch64_sve_uhsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24682471
def int_aarch64_sve_uqadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24692472
def int_aarch64_sve_uqrshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
2473+
def int_aarch64_sve_uqrshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24702474
def int_aarch64_sve_uqshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
2475+
def int_aarch64_sve_uqshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24712476
def int_aarch64_sve_uqsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24722477
def int_aarch64_sve_uqsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24732478
def int_aarch64_sve_uqsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24742479
def int_aarch64_sve_urecpe : AdvSIMD_Merged1VectorArg_Intrinsic<[IntrSpeculatable]>;
24752480
def int_aarch64_sve_urhadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24762481
def int_aarch64_sve_urshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
2482+
def int_aarch64_sve_urshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24772483
def int_aarch64_sve_urshr : AdvSIMD_SVE_ShiftByImm_Intrinsic<[IntrSpeculatable]>;
24782484
def int_aarch64_sve_ursqrte : AdvSIMD_Merged1VectorArg_Intrinsic<[IntrSpeculatable]>;
24792485
def int_aarch64_sve_ursra : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3885,12 +3885,12 @@ let Predicates = [HasSVE2_or_SME] in {
38853885
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag, "SQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHL_ZPmZ", /*isReverseInstr*/ 1>;
38863886
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag, "UQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHL_ZPmZ", /*isReverseInstr*/ 1>;
38873887

3888-
defm SRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_srshl>;
3889-
defm URSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_urshl>;
3890-
defm SQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqshl>;
3891-
defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqshl>;
3892-
defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqrshl>;
3893-
defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqrshl>;
3888+
defm SRSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_srshl_u>;
3889+
defm URSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_urshl_u>;
3890+
defm SQSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_sqshl_u>;
3891+
defm UQSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_uqshl_u>;
3892+
defm SQRSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_sqrshl_u>;
3893+
defm UQRSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_uqrshl_u>;
38943894
} // End HasSVE2_or_SME
38953895

38963896
let Predicates = [HasSVE2_or_SME, UseExperimentalZeroingPseudos] in {
@@ -3909,6 +3909,9 @@ let Predicates = [HasSVE2_or_SME] in {
39093909
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1101, "urshr", "URSHR_ZPZI", AArch64urshri_p>;
39103910
defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
39113911

3912+
defm SQSHL_ZPZI : sve_int_shift_pred_bhsd<int_aarch64_sve_sqshl_u, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
3913+
defm UQSHL_ZPZI : sve_int_shift_pred_bhsd<int_aarch64_sve_uqshl_u, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
3914+
39123915
// SVE2 integer add/subtract long
39133916
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>;
39143917
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>;

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,10 +1446,22 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
14461446
case Intrinsic::aarch64_sve_orr:
14471447
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_orr_u)
14481448
.setMatchingIROpcode(Instruction::Or);
1449+
case Intrinsic::aarch64_sve_sqrshl:
1450+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqrshl_u);
1451+
case Intrinsic::aarch64_sve_sqshl:
1452+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqshl_u);
14491453
case Intrinsic::aarch64_sve_sqsub:
14501454
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqsub_u);
1455+
case Intrinsic::aarch64_sve_srshl:
1456+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_srshl_u);
1457+
case Intrinsic::aarch64_sve_uqrshl:
1458+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqrshl_u);
1459+
case Intrinsic::aarch64_sve_uqshl:
1460+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqshl_u);
14511461
case Intrinsic::aarch64_sve_uqsub:
14521462
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqsub_u);
1463+
case Intrinsic::aarch64_sve_urshl:
1464+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_urshl_u);
14531465

14541466
case Intrinsic::aarch64_sve_add_u:
14551467
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(

0 commit comments

Comments
 (0)