Skip to content

Commit 86175be

Browse files
committed
[AArch64] Disable Xor merging for certain neoverse cores
1 parent f2d0ef1 commit 86175be

File tree

5 files changed

+42
-4
lines changed

5 files changed

+42
-4
lines changed

llvm/lib/Target/AArch64/AArch64Features.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,11 @@ def FeatureUseWzrToVecMove : SubtargetFeature<"use-wzr-to-vec-move",
897897
"UseWzrToVecMove", "true",
898898
"Move from WZR to insert 0 into vector registers">;
899899

900+
// On some processors, 2 operand xor use higher throughput paths than 3 operand xor, making
901+
// it more beneficial to keep them separate.
902+
def FeatureDisableXorMerge : SubtargetFeature<"disable-xor-merge",
903+
"DisableXorMerge", "true",
904+
"Prevent chained XOR instructions from being merged into EOR3">;
900905
//===----------------------------------------------------------------------===//
901906
// Architectures.
902907
//

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ def AllowMisalignedMemAccesses
436436
: Predicate<"!Subtarget->requiresStrictAlign()">;
437437

438438
def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">;
439+
def AllowXorToEor3Merge : Predicate<"!Subtarget->disableXorMerge()">;
439440

440441

441442
//===----------------------------------------------------------------------===//
@@ -1817,10 +1818,12 @@ multiclass EOR3_pattern<ValueType Vec128Ty, ValueType Vec64Ty>{
18171818
dsub)>;
18181819
}
18191820

1821+
let Predicates = [AllowXorToEor3Merge], AddedComplexity=1 in {
18201822
defm : EOR3_pattern<v16i8, v8i8>;
18211823
defm : EOR3_pattern<v8i16, v4i16>;
18221824
defm : EOR3_pattern<v4i32, v2i32>;
18231825
defm : EOR3_pattern<v2i64, v1i64>;
1826+
}
18241827

18251828
class BCAX_pattern<ValueType VecTy>
18261829
: Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,7 +1081,8 @@ def ProcessorFeatures {
10811081
FeatureCCIDX,
10821082
FeatureDotProd, FeatureFullFP16, FeatureSB, FeatureSSBS, FeatureSVE,
10831083
FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE,
1084-
FeatureNEON, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureFPAC];
1084+
FeatureNEON, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureFPAC,
1085+
FeatureDisableXorMerge];
10851086
list<SubtargetFeature> NeoverseN3 = [HasV9_2aOps, FeatureETE, FeatureFP16FML,
10861087
FeatureFullFP16, FeatureMTE, FeaturePerfMon,
10871088
FeatureRandGen, FeatureSPE, FeatureSPE_EEF,
@@ -1109,15 +1110,16 @@ def ProcessorFeatures {
11091110
FeatureCCIDX,
11101111
FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum,
11111112
FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS,
1112-
FeatureRCPC, FeatureRDM];
1113+
FeatureRCPC, FeatureRDM, FeatureDisableXorMerge];
11131114
list<SubtargetFeature> NeoverseV2 = [HasV9_0aOps, FeatureBF16, FeatureSPE,
11141115
FeaturePerfMon, FeatureETE, FeatureMatMulInt8,
11151116
FeatureNEON, FeatureSVEBitPerm, FeatureFP16FML,
11161117
FeatureMTE, FeatureRandGen,
11171118
FeatureCCIDX,
11181119
FeatureSVE, FeatureSVE2, FeatureSSBS, FeatureFullFP16, FeatureDotProd,
11191120
FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE,
1120-
FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureFPAC];
1121+
FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureFPAC,
1122+
FeatureDisableXorMerge];
11211123
list<SubtargetFeature> NeoverseV3 = [HasV9_2aOps, FeatureETE, FeatureFP16FML,
11221124
FeatureFullFP16, FeatureLS64, FeatureMTE,
11231125
FeaturePerfMon, FeatureRandGen, FeatureSPE,

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,9 @@ def AArch64msb_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
517517
(vselect node:$pred, (sub node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op1)), node:$op1)]>;
518518
def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3),
519519
[(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3),
520-
(xor node:$op1, (xor node:$op2, node:$op3))]>;
520+
(xor node:$op1, (xor node:$op2, node:$op3))], [{
521+
return N->getOpcode() != ISD::XOR || !Subtarget->disableXorMerge();
522+
}]>;
521523
def AArch64bcax : PatFrags<(ops node:$op1, node:$op2, node:$op3),
522524
[(int_aarch64_sve_bcax node:$op1, node:$op2, node:$op3),
523525
(xor node:$op1, (and node:$op2, (vnot node:$op3)))]>;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
2+
; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-v1 < %s | FileCheck --check-prefix=CHECK-NOMERGE %s
3+
; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-v2 < %s | FileCheck --check-prefix=CHECK-NOMERGE %s
4+
; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-v3 < %s | FileCheck --check-prefix=CHECK-MERGE %s
5+
; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-n2 < %s | FileCheck --check-prefix=CHECK-NOMERGE %s
6+
; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-n3 < %s | FileCheck --check-prefix=CHECK-MERGE %s
7+
8+
define <8 x i8> @eor3_8x8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
9+
; CHECK-NOMERGE-LABEL: eor3_8x8:
10+
; CHECK-NOMERGE: // %bb.0:
11+
; CHECK-NOMERGE-NEXT: eor v1.8b, v1.8b, v2.8b
12+
; CHECK-NOMERGE-NEXT: eor v0.8b, v1.8b, v0.8b
13+
; CHECK-NOMERGE-NEXT: ret
14+
;
15+
; CHECK-MERGE-LABEL: eor3_8x8:
16+
; CHECK-MERGE: // %bb.0:
17+
; CHECK-MERGE-NEXT: // kill: def $d2 killed $d2 def $q2
18+
; CHECK-MERGE-NEXT: // kill: def $d1 killed $d1 def $q1
19+
; CHECK-MERGE-NEXT: // kill: def $d0 killed $d0 def $q0
20+
; CHECK-MERGE-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
21+
; CHECK-MERGE-NEXT: // kill: def $d0 killed $d0 killed $q0
22+
; CHECK-MERGE-NEXT: ret
23+
%4 = xor <8 x i8> %1, %2
24+
%5 = xor <8 x i8> %4, %0
25+
ret <8 x i8> %5
26+
}

0 commit comments

Comments
 (0)