diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ad1d1237aa25a..23d3985f9fc9e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29274,6 +29274,11 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE( } } + // Try to widen the shuffle before generating a possibly expensive SVE TBL. + // This may allow the shuffle to be matched as something cheaper like ZIP1. + if (SDValue WideOp = tryWidenMaskForShuffle(Op, DAG)) + return WideOp; + // Avoid producing TBL instruction if we don't know SVE register minimal size, // unless NEON is not available and we can assume minimal SVE register size is // 128-bits. diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll index 6e2ecfca9e963..619840fc6afb2 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 -; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -406,33 +406,13 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) { ; define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) { -; SVE2-LABEL: concat_v4f16: -; SVE2: // %bb.0: -; SVE2-NEXT: cnth x8 -; SVE2-NEXT: adrp x9, .LCPI15_0 -; SVE2-NEXT: adrp x10, .LCPI15_1 -; SVE2-NEXT: mov z2.h, w8 -; SVE2-NEXT: ldr q3, [x9, :lo12:.LCPI15_0] -; SVE2-NEXT: ldr q4, [x10, :lo12:.LCPI15_1] -; SVE2-NEXT: ptrue p0.h, vl8 -; SVE2-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 -; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 -; SVE2-NEXT: mad z2.h, p0/m, z3.h, z4.h -; SVE2-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h -; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 -; SVE2-NEXT: ret -; -; SME-LABEL: concat_v4f16: -; SME: // %bb.0: -; SME-NEXT: // kill: def $d1 killed $d1 def $z1 -; SME-NEXT: // kill: def $d0 killed $d0 def $z0 -; SME-NEXT: mov z2.h, z1.h[1] -; SME-NEXT: mov z3.h, z0.h[1] -; SME-NEXT: zip1 z1.h, z1.h, z2.h -; SME-NEXT: zip1 z0.h, z0.h, z3.h -; SME-NEXT: zip1 z0.s, z0.s, z1.s -; SME-NEXT: // kill: def $d0 killed $d0 killed $z0 -; SME-NEXT: ret +; CHECK-LABEL: concat_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: concat_v4f16: ; NONEON-NOSVE: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll index a728cbe97056d..35dd827bbabc5 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll @@ -276,10 +276,8 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) { define <2 x half> @extract_subvector_v4f16(<4 x half> %op) { ; CHECK-LABEL: extract_subvector_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h +; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ;