Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29274,6 +29274,11 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
}
}

// Try to widen the shuffle before generating a possibly expensive SVE TBL.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could there be any value for the patterns above to move this up a bit so that it's executed earlier in the function?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it changes the result (as all the matching above is looking for single-instruction replacements). Also, I placed it here to be consistent with the non-SVE lowering, which attempts this just before generating the Neon TBL too.

// This may allow the shuffle to be matched as something cheaper like ZIP1.
if (SDValue WideOp = tryWidenMaskForShuffle(Op, DAG))
return WideOp;

// Avoid producing TBL instruction if we don't know SVE register minimal size,
// unless NEON is not available and we can assume minimal SVE register size is
// 128-bits.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE

target triple = "aarch64-unknown-linux-gnu"
Expand Down Expand Up @@ -406,33 +406,13 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
;

define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
; SVE2-LABEL: concat_v4f16:
; SVE2: // %bb.0:
; SVE2-NEXT: cnth x8
; SVE2-NEXT: adrp x9, .LCPI15_0
; SVE2-NEXT: adrp x10, .LCPI15_1
; SVE2-NEXT: mov z2.h, w8
; SVE2-NEXT: ldr q3, [x9, :lo12:.LCPI15_0]
; SVE2-NEXT: ldr q4, [x10, :lo12:.LCPI15_1]
; SVE2-NEXT: ptrue p0.h, vl8
; SVE2-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: mad z2.h, p0/m, z3.h, z4.h
; SVE2-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
;
; SME-LABEL: concat_v4f16:
; SME: // %bb.0:
; SME-NEXT: // kill: def $d1 killed $d1 def $z1
; SME-NEXT: // kill: def $d0 killed $d0 def $z0
; SME-NEXT: mov z2.h, z1.h[1]
; SME-NEXT: mov z3.h, z0.h[1]
; SME-NEXT: zip1 z1.h, z1.h, z2.h
; SME-NEXT: zip1 z0.h, z0.h, z3.h
; SME-NEXT: zip1 z0.s, z0.s, z1.s
; SME-NEXT: // kill: def $d0 killed $d0 killed $z0
; SME-NEXT: ret
; CHECK-LABEL: concat_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: concat_v4f16:
; NONEON-NOSVE: // %bb.0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,8 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) {
define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
; CHECK-LABEL: extract_subvector_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI12_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
Expand Down
Loading