Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,12 @@ def gi_extract_high_v4i32 :

def extract_high_v8f16 :
ComplexPattern<v4f16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v8bf16 :
ComplexPattern<v4bf16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v4f32 :
ComplexPattern<v2f32, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v2f64 :
ComplexPattern<v1f64, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;

def gi_extract_high_v8f16 :
GIComplexOperandMatcher<v4s16, "selectExtractHigh">,
Expand Down
27 changes: 17 additions & 10 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -7352,7 +7352,8 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
// INS.
multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
multiclass ConcatPat<ValueType DstTy, ValueType SrcTy,
ComplexPattern ExtractHigh> {
def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
(INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
Expand All @@ -7365,16 +7366,22 @@ multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
// If the high lanes are undef we can just ignore them:
def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
}

defm : ConcatPat<v2i64, v1i64>;
defm : ConcatPat<v2f64, v1f64>;
defm : ConcatPat<v4i32, v2i32>;
defm : ConcatPat<v4f32, v2f32>;
defm : ConcatPat<v8i16, v4i16>;
defm : ConcatPat<v8f16, v4f16>;
defm : ConcatPat<v8bf16, v4bf16>;
defm : ConcatPat<v16i8, v8i8>;
// Concatting the high half of two vectors is the insert of the first
// into the low half of the second.
def : Pat<(DstTy (concat_vectors (ExtractHigh (DstTy V128:$Rn)),
(ExtractHigh (DstTy V128:$Rm)))),
(INSvi64lane V128:$Rm, (i64 0), V128:$Rn, (i64 1))>;
}

defm : ConcatPat<v2i64, v1i64, extract_high_v2i64>;
defm : ConcatPat<v2f64, v1f64, extract_high_v2f64>;
defm : ConcatPat<v4i32, v2i32, extract_high_v4i32>;
defm : ConcatPat<v4f32, v2f32, extract_high_v4f32>;
defm : ConcatPat<v8i16, v4i16, extract_high_v8i16>;
defm : ConcatPat<v8f16, v4f16, extract_high_v8f16>;
defm : ConcatPat<v8bf16, v4bf16, extract_high_v8bf16>;
defm : ConcatPat<v16i8, v8i8, extract_high_v16i8>;

//----------------------------------------------------------------------------
// AdvSIMD across lanes instructions
Expand Down
82 changes: 24 additions & 58 deletions llvm/test/CodeGen/AArch64/concat-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -385,19 +385,11 @@ entry:
}

define <8 x i16> @concat_high_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
; CHECK-SD-LABEL: concat_high_high_v8i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_high_high_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: mov d1, v1.d[1]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_high_high_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v1.d[0], v0.d[1]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
Expand All @@ -406,19 +398,11 @@ entry:
}

define <8 x half> @concat_high_high_v8f16(<8 x half> %a_vec, <8 x half> %b_vec) {
; CHECK-SD-LABEL: concat_high_high_v8f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_high_high_v8f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: mov d1, v1.d[1]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_high_high_v8f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v1.d[0], v0.d[1]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <8 x half> %a_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i = shufflevector <8 x half> %b_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
Expand All @@ -427,19 +411,11 @@ entry:
}

define <8 x bfloat> @concat_high_high_v8bf16(<8 x bfloat> %a_vec, <8 x bfloat> %b_vec) {
; CHECK-SD-LABEL: concat_high_high_v8bf16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_high_high_v8bf16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: mov d1, v1.d[1]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_high_high_v8bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v1.d[0], v0.d[1]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <8 x bfloat> %a_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i = shufflevector <8 x bfloat> %b_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
Expand All @@ -455,9 +431,8 @@ define <4 x i32> @concat_high_high_v4i32(<4 x i32> %a_vec, <4 x i32> %b_vec) {
;
; CHECK-GI-LABEL: concat_high_high_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: mov d1, v1.d[1]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
; CHECK-GI-NEXT: mov v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%shuffle.i3 = shufflevector <4 x i32> %a_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
Expand All @@ -474,9 +449,8 @@ define <4 x float> @concat_high_high_v4f32(<4 x float> %a_vec, <4 x float> %b_ve
;
; CHECK-GI-LABEL: concat_high_high_v4f32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: mov d1, v1.d[1]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
; CHECK-GI-NEXT: mov v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%shuffle.i3 = shufflevector <4 x float> %a_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3>
Expand All @@ -486,19 +460,11 @@ entry:
}

define <16 x i8> @concat_high_high_v16i8(<16 x i8> %a_vec, <16 x i8> %b_vec) {
; CHECK-SD-LABEL: concat_high_high_v16i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_high_high_v16i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: mov d1, v1.d[1]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_high_high_v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v1.d[0], v0.d[1]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <16 x i8> %a_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i = shufflevector <16 x i8> %b_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/vecreduce-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4885,8 +4885,7 @@ entry:
define i32 @extract_hi_hi(<8 x i16> %a) {
; CHECK-SD-LABEL: extract_hi_hi:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: mov v0.d[1], v0.d[0]
; CHECK-SD-NEXT: mov v0.d[0], v0.d[1]
; CHECK-SD-NEXT: uaddlv s0, v0.8h
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
Expand Down
Loading