diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 49e9d6bd73a4c..de03fcc365785 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -874,6 +874,10 @@ class CombinerHelper { /// Remove references to rhs if it is undef bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not + /// reference a. + bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Use a function which takes in a MachineIRBuilder to perform a combine. /// By default, it erases the instruction def'd on \p MO from the function. void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 95f3d637da854..8fe7f0f5907c6 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1594,6 +1594,13 @@ def combine_shuffle_undef_rhs : GICombineRule< (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }]) >; +def combine_shuffle_disjoint_mask : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return Helper.matchShuffleDisjointMask(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }]) +>; + // match_extract_of_element and insert_vector_elt_oob must be the first! def vector_ops_combines: GICombineGroup<[ match_extract_of_element_undef_vector, @@ -1945,7 +1952,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop, def prefer_sign_combines : GICombineGroup<[nneg_zext]>; def shuffle_combines : GICombineGroup<[combine_shuffle_concat, - combine_shuffle_undef_rhs]>; + combine_shuffle_undef_rhs, + combine_shuffle_disjoint_mask]>; def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, vector_ops_combines, freeze_combines, cast_combines, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 1a1a1c28ef150..83d78c0bde399 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7724,3 +7724,69 @@ bool CombinerHelper::matchShuffleUndefRHS(MachineInstr &MI, return true; } + +static void commuteMask(MutableArrayRef Mask, const unsigned NumElems) { + const unsigned MaskSize = Mask.size(); + for (unsigned I = 0; I < MaskSize; ++I) { + int Idx = Mask[I]; + if (Idx < 0) + continue; + + if (Idx < (int)NumElems) + Mask[I] = Idx + NumElems; + else + Mask[I] = Idx - NumElems; + } +} + +bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI, + BuildFnTy &MatchInfo) { + + auto &Shuffle = cast(MI); + // If any of the two inputs is already undef, don't check the mask again to + // prevent infinite loop + if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI)) + return false; + + if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI)) + return false; + + const LLT DstTy = MRI.getType(Shuffle.getReg(0)); + const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg()); + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}})) + return false; + + ArrayRef Mask = Shuffle.getMask(); + const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; + + bool TouchesSrc1 = false; + bool TouchesSrc2 = false; + const unsigned NumElems = Mask.size(); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + if (Mask[Idx] < 0) + continue; + + if (Mask[Idx] < (int)NumSrcElems) + TouchesSrc1 = true; + else + TouchesSrc2 = true; + } + + if (TouchesSrc1 == TouchesSrc2) + return false; + + Register NewSrc1 = Shuffle.getSrc1Reg(); + SmallVector NewMask(Mask); + if (TouchesSrc2) { + NewSrc1 = Shuffle.getSrc2Reg(); + commuteMask(NewMask, NumSrcElems); + } + + MatchInfo = [=, &Shuffle](MachineIRBuilder &B) { + auto Undef = B.buildUndef(Src1Ty); + B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask); + }; + + return true; +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir index 83b5c388520eb..e2933690c7c55 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir @@ -598,9 +598,9 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1 ; CHECK-NEXT: %idx:_(s64) = COPY $x1 - ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(undef, 0, 0, 0) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 0) ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64) ; CHECK-NEXT: $w0 = COPY %extract(s32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir index fa725ad0c5fb4..6b84a8488e478 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir @@ -1395,9 +1395,9 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1 ; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0) ; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %freeze_sv(<4 x s32>), %idx(s64) ; CHECK-NEXT: $w0 = COPY %extract(s32) @@ -1422,9 +1422,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %elt:_(s32) = COPY $w0 ; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1 ; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0) ; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %elt ; CHECK-NEXT: %extract:_(<4 x s32>) = G_INSERT_VECTOR_ELT %freeze_sv, [[FREEZE]](s32), %idx(s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir new file mode 100644 index 0000000000000..9261d7af41c69 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir @@ -0,0 +1,101 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s + +--- +name: shuffle_vector_unused_lhs +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: shuffle_vector_unused_lhs + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(1, 0, 1, 0) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>) + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(3, 2, 3, 2) + RET_ReallyLR implicit %2 +... + +--- +name: shuffle_vector_unused_rhs +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: shuffle_vector_unused_rhs + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(0, 0, 1, 1) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>) + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,0,1,1) + RET_ReallyLR implicit %2 +... + +--- +name: shuffle_vector_both_used +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: shuffle_vector_both_used + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>) + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,2,1,3) + RET_ReallyLR implicit %2 +... + +--- +name: shuffle_vector_undef_elems +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: shuffle_vector_undef_elems + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, 0, 1, undef) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>) + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1,0,1,-1) + RET_ReallyLR implicit %2 +... + +--- +name: shuffle_vector_scalar +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: shuffle_vector_scalar + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY]](s64), [[COPY]](s64), [[COPY]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0, 0, 0) + RET_ReallyLR implicit %2 +... diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index 2a2f304b23e9b..0c7a61739695f 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -201,21 +201,21 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r ; CHECK-GI: // %bb.0: // %vector.header ; CHECK-GI-NEXT: and w9, w3, #0xffff ; CHECK-GI-NEXT: adrp x8, .LCPI2_0 -; CHECK-GI-NEXT: dup v1.4s, w9 +; CHECK-GI-NEXT: dup v0.4s, w9 ; CHECK-GI-NEXT: mov w9, w0 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI2_0] +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-GI-NEXT: and x8, x9, #0xfffffff8 ; CHECK-GI-NEXT: .LBB2_1: // %vector.body ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: ldrh w9, [x2], #16 ; CHECK-GI-NEXT: subs x8, x8, #8 -; CHECK-GI-NEXT: mov v0.s[0], w9 +; CHECK-GI-NEXT: mov v2.s[0], w9 ; CHECK-GI-NEXT: mov w9, w0 ; CHECK-GI-NEXT: add w0, w0, #8 ; CHECK-GI-NEXT: lsl x9, x9, #2 -; CHECK-GI-NEXT: tbl v3.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: mul v3.4s, v1.4s, v3.4s -; CHECK-GI-NEXT: str q3, [x1, x9] +; CHECK-GI-NEXT: tbl v2.16b, { v2.16b, v3.16b }, v1.16b +; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: str q2, [x1, x9] ; CHECK-GI-NEXT: b.ne .LBB2_1 ; CHECK-GI-NEXT: // %bb.2: // %for.end12 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 3c4901ade972e..69a69dbd3b18b 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -2385,9 +2385,8 @@ define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) { ; ; CHECK-GI-LABEL: do_stuff: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 ; CHECK-GI-NEXT: xtn v0.2s, v0.2d -; CHECK-GI-NEXT: ext v2.16b, v1.16b, v2.16b, #8 +; CHECK-GI-NEXT: mov d2, v1.d[1] ; CHECK-GI-NEXT: umull v0.2d, v2.2s, v0.2s ; CHECK-GI-NEXT: xtn v0.2s, v0.2d ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s diff --git a/llvm/test/CodeGen/AArch64/arm64-ext.ll b/llvm/test/CodeGen/AArch64/arm64-ext.ll index 932b94a91095a..e32d83327fe42 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ext.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ext.ll @@ -70,15 +70,10 @@ define <8 x i8> @test_vextd_undef(<8 x i8> %tmp1, <8 x i8> %tmp2) { } define <8 x i8> @test_vextd_undef2(<8 x i8> %tmp1, <8 x i8> %tmp2) { -; CHECK-SD-LABEL: test_vextd_undef2: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ext v0.8b, v0.8b, v0.8b, #6 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vextd_undef2: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ext v0.8b, v1.8b, v0.8b, #6 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vextd_undef2: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #6 +; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> ret <8 x i8> %tmp3 } diff --git a/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll b/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll index 02e6f28ee6ff9..f08ff2ccf693d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll +++ b/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll @@ -1,35 +1,50 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-GI ; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8" insn. define <8 x i8> @v8i8(<16 x i8> %a) nounwind { -; CHECK-LABEL: v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext.16b v0, v0, v0, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0[1] +; CHECK-GI-NEXT: ret %ret = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> ret <8 x i8> %ret } define <4 x i16> @v4i16(<8 x i16> %a) nounwind { -; CHECK-LABEL: v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext.16b v0, v0, v0, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0[1] +; CHECK-GI-NEXT: ret %ret = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> ret <4 x i16> %ret } define <2 x i32> @v2i32(<4 x i32> %a) nounwind { -; CHECK-LABEL: v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext.16b v0, v0, v0, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0[1] +; CHECK-GI-NEXT: ret %ret = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> ret <2 x i32> %ret } @@ -65,11 +80,16 @@ define <1 x ptr> @v1p0(<2 x ptr> %a) nounwind { } define <2 x float> @v2f32(<4 x float> %a) nounwind { -; CHECK-LABEL: v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext.16b v0, v0, v0, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0[1] +; CHECK-GI-NEXT: ret %ret = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> ret <2 x float> %ret } diff --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll index 2c5d33da93c86..177f2cafcf833 100644 --- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll +++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll @@ -17,17 +17,11 @@ entry: } define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) { -; CHECK-SD-LABEL: i8_off1: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GISEL-LABEL: i8_off1: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #1 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: i8_off1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> ret <8 x i8> %shuffle @@ -42,8 +36,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) { ; ; CHECK-GISEL-LABEL: i8_off8: ; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: mov d0, v0.d[1] ; CHECK-GISEL-NEXT: ret entry: %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> @@ -90,17 +83,11 @@ entry: } define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) { -; CHECK-SD-LABEL: i16_off1: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #2 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GISEL-LABEL: i16_off1: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #2 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: i16_off1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> ret <4 x i16> %shuffle @@ -140,17 +127,11 @@ entry: } define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) { -; CHECK-SD-LABEL: i32_off1: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #4 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GISEL-LABEL: i32_off1: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #4 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: i32_off1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> ret <2 x i32> %shuffle @@ -228,18 +209,11 @@ entry: } define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) { -; CHECK-SD-LABEL: i8_zero_off1: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GISEL-LABEL: i8_zero_off1: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000 -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #1 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: i8_zero_off1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> ret <8 x i8> %shuffle @@ -254,9 +228,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) { ; ; CHECK-GISEL-LABEL: i8_zero_off8: ; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000 -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: mov d0, v0.d[1] ; CHECK-GISEL-NEXT: ret entry: %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> @@ -283,8 +255,8 @@ define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) { ; ; CHECK-GISEL-LABEL: i8_zero_off22: ; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000 -; CHECK-GISEL-NEXT: ext v0.16b, v1.16b, v0.16b, #6 +; CHECK-GISEL-NEXT: movi v0.2d, #0000000000000000 +; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #6 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: ret entry: @@ -304,18 +276,11 @@ entry: } define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) { -; CHECK-SD-LABEL: i16_zero_off1: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #2 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GISEL-LABEL: i16_zero_off1: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000 -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #2 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: i16_zero_off1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> ret <4 x i16> %shuffle @@ -355,18 +320,11 @@ entry: } define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) { -; CHECK-SD-LABEL: i32_zero_off1: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #4 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GISEL-LABEL: i32_zero_off1: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000 -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #4 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: i32_zero_off1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> ret <2 x i32> %shuffle