-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AArch64] Add tablegen patterns for concat(extract-high, extract-high) #118286
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Your org has enabled the Graphite merge queue for merging into mainAdd the label “FP Bundles” to the PR and Graphite will automatically add it to the merge queue when it’s ready to merge. You must have a Graphite account and log in to Graphite in order to use the merge queue. Sign up using this link. |
|
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesA Full diff: https://github.com/llvm/llvm-project/pull/118286.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index a8ba89f784c8cd..56ff7b0d3a280d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -145,8 +145,12 @@ def gi_extract_high_v4i32 :
def extract_high_v8f16 :
ComplexPattern<v4f16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+def extract_high_v8bf16 :
+ ComplexPattern<v4bf16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v4f32 :
ComplexPattern<v2f32, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+def extract_high_v2f64 :
+ ComplexPattern<v1f64, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def gi_extract_high_v8f16 :
GIComplexOperandMatcher<v4s16, "selectExtractHigh">,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7614f6215b803c..260a57a62325ed 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7352,7 +7352,8 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
// INS.
-multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
+multiclass ConcatPat<ValueType DstTy, ValueType SrcTy,
+ ComplexPattern ExtractHigh> {
def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
(INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
@@ -7365,16 +7366,22 @@ multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
// If the high lanes are undef we can just ignore them:
def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
-}
-defm : ConcatPat<v2i64, v1i64>;
-defm : ConcatPat<v2f64, v1f64>;
-defm : ConcatPat<v4i32, v2i32>;
-defm : ConcatPat<v4f32, v2f32>;
-defm : ConcatPat<v8i16, v4i16>;
-defm : ConcatPat<v8f16, v4f16>;
-defm : ConcatPat<v8bf16, v4bf16>;
-defm : ConcatPat<v16i8, v8i8>;
+ // Concatting the high half of two vectos is the insert of the first
+ // into the low half of the second.
+ def : Pat<(DstTy (concat_vectors (ExtractHigh (DstTy V128:$Rn)),
+ (ExtractHigh (DstTy V128:$Rm)))),
+ (INSvi64lane V128:$Rm, (i64 0), V128:$Rn, (i64 1))>;
+}
+
+defm : ConcatPat<v2i64, v1i64, extract_high_v2i64>;
+defm : ConcatPat<v2f64, v1f64, extract_high_v2f64>;
+defm : ConcatPat<v4i32, v2i32, extract_high_v4i32>;
+defm : ConcatPat<v4f32, v2f32, extract_high_v4f32>;
+defm : ConcatPat<v8i16, v4i16, extract_high_v8i16>;
+defm : ConcatPat<v8f16, v4f16, extract_high_v8f16>;
+defm : ConcatPat<v8bf16, v4bf16, extract_high_v8bf16>;
+defm : ConcatPat<v16i8, v8i8, extract_high_v16i8>;
//----------------------------------------------------------------------------
// AdvSIMD across lanes instructions
diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index 36583b89ce5fca..0daa6e7f16202a 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -385,19 +385,11 @@ entry:
}
define <8 x i16> @concat_high_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
-; CHECK-SD-LABEL: concat_high_high_v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: concat_high_high_v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: concat_high_high_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -406,19 +398,11 @@ entry:
}
define <8 x half> @concat_high_high_v8f16(<8 x half> %a_vec, <8 x half> %b_vec) {
-; CHECK-SD-LABEL: concat_high_high_v8f16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: concat_high_high_v8f16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: concat_high_high_v8f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <8 x half> %a_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i = shufflevector <8 x half> %b_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -427,19 +411,11 @@ entry:
}
define <8 x bfloat> @concat_high_high_v8bf16(<8 x bfloat> %a_vec, <8 x bfloat> %b_vec) {
-; CHECK-SD-LABEL: concat_high_high_v8bf16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: concat_high_high_v8bf16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: concat_high_high_v8bf16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <8 x bfloat> %a_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i = shufflevector <8 x bfloat> %b_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -455,9 +431,8 @@ define <4 x i32> @concat_high_high_v4i32(<4 x i32> %a_vec, <4 x i32> %b_vec) {
;
; CHECK-GI-LABEL: concat_high_high_v4i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%shuffle.i3 = shufflevector <4 x i32> %a_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
@@ -474,9 +449,8 @@ define <4 x float> @concat_high_high_v4f32(<4 x float> %a_vec, <4 x float> %b_ve
;
; CHECK-GI-LABEL: concat_high_high_v4f32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%shuffle.i3 = shufflevector <4 x float> %a_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3>
@@ -486,19 +460,11 @@ entry:
}
define <16 x i8> @concat_high_high_v16i8(<16 x i8> %a_vec, <16 x i8> %b_vec) {
-; CHECK-SD-LABEL: concat_high_high_v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: concat_high_high_v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: concat_high_high_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <16 x i8> %a_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i = shufflevector <16 x i8> %b_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 184aa0226fe774..8473f45f6c803b 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4885,8 +4885,7 @@ entry:
define i32 @extract_hi_hi(<8 x i16> %a) {
; CHECK-SD-LABEL: extract_hi_hi:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v0.d[0]
+; CHECK-SD-NEXT: mov v0.d[0], v0.d[1]
; CHECK-SD-NEXT: uaddlv s0, v0.8h
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
|
nasherm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
A `concat(extract-high(x), extract-high(y))` is the top half of x inserted into the bottom half of y. This patch adds a tablegen pattern to make sure that we generate a single i64 lane insert.
46b06b3 to
b5d8b9f
Compare
A
concat(extract-high(x), extract-high(y))is the top half of x inserted into the bottom half of y. This patch adds a tablegen pattern to make sure that we generate a single i64 lane insert.