diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index a8ba89f784c8c..56ff7b0d3a280 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -145,8 +145,12 @@ def gi_extract_high_v4i32 : def extract_high_v8f16 : ComplexPattern; +def extract_high_v8bf16 : + ComplexPattern; def extract_high_v4f32 : ComplexPattern; +def extract_high_v2f64 : + ComplexPattern; def gi_extract_high_v8f16 : GIComplexOperandMatcher, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 7614f6215b803..d015cc15581ad 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7352,7 +7352,8 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), // All concat_vectors operations are canonicalised to act on i64 vectors for // AArch64. In the general case we need an instruction, which had just as well be // INS. -multiclass ConcatPat { +multiclass ConcatPat { def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; @@ -7365,16 +7366,22 @@ multiclass ConcatPat { // If the high lanes are undef we can just ignore them: def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; -} -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; + // Concatting the high half of two vectors is the insert of the first + // into the low half of the second. + def : Pat<(DstTy (concat_vectors (ExtractHigh (DstTy V128:$Rn)), + (ExtractHigh (DstTy V128:$Rm)))), + (INSvi64lane V128:$Rm, (i64 0), V128:$Rn, (i64 1))>; +} + +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; //---------------------------------------------------------------------------- // AdvSIMD across lanes instructions diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index 36583b89ce5fc..0daa6e7f16202 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -385,19 +385,11 @@ entry: } define <8 x i16> @concat_high_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) { -; CHECK-SD-LABEL: concat_high_high_v8i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_high_high_v8i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_high_high_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v1.d[0], v0.d[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret entry: %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> @@ -406,19 +398,11 @@ entry: } define <8 x half> @concat_high_high_v8f16(<8 x half> %a_vec, <8 x half> %b_vec) { -; CHECK-SD-LABEL: concat_high_high_v8f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_high_high_v8f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_high_high_v8f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v1.d[0], v0.d[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret entry: %shuffle.i3 = shufflevector <8 x half> %a_vec, <8 x half> poison, <4 x i32> %shuffle.i = shufflevector <8 x half> %b_vec, <8 x half> poison, <4 x i32> @@ -427,19 +411,11 @@ entry: } define <8 x bfloat> @concat_high_high_v8bf16(<8 x bfloat> %a_vec, <8 x bfloat> %b_vec) { -; CHECK-SD-LABEL: concat_high_high_v8bf16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_high_high_v8bf16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_high_high_v8bf16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v1.d[0], v0.d[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret entry: %shuffle.i3 = shufflevector <8 x bfloat> %a_vec, <8 x bfloat> poison, <4 x i32> %shuffle.i = shufflevector <8 x bfloat> %b_vec, <8 x bfloat> poison, <4 x i32> @@ -455,9 +431,8 @@ define <4 x i32> @concat_high_high_v4i32(<4 x i32> %a_vec, <4 x i32> %b_vec) { ; ; CHECK-GI-LABEL: concat_high_high_v4i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v1.d[0], v0.d[1] +; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: ret entry: %shuffle.i3 = shufflevector <4 x i32> %a_vec, <4 x i32> poison, <2 x i32> @@ -474,9 +449,8 @@ define <4 x float> @concat_high_high_v4f32(<4 x float> %a_vec, <4 x float> %b_ve ; ; CHECK-GI-LABEL: concat_high_high_v4f32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v1.d[0], v0.d[1] +; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: ret entry: %shuffle.i3 = shufflevector <4 x float> %a_vec, <4 x float> poison, <2 x i32> @@ -486,19 +460,11 @@ entry: } define <16 x i8> @concat_high_high_v16i8(<16 x i8> %a_vec, <16 x i8> %b_vec) { -; CHECK-SD-LABEL: concat_high_high_v16i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_high_high_v16i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_high_high_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v1.d[0], v0.d[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret entry: %shuffle.i3 = shufflevector <16 x i8> %a_vec, <16 x i8> poison, <8 x i32> %shuffle.i = shufflevector <16 x i8> %b_vec, <16 x i8> poison, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll index 184aa0226fe77..8473f45f6c803 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -4885,8 +4885,7 @@ entry: define i32 @extract_hi_hi(<8 x i16> %a) { ; CHECK-SD-LABEL: extract_hi_hi: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v0.d[0] +; CHECK-SD-NEXT: mov v0.d[0], v0.d[1] ; CHECK-SD-NEXT: uaddlv s0, v0.8h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret