Skip to content

Commit 58ee251

Browse files
committed
Try to widen shuffles before generating SVE TBL
1 parent 9032a45 commit 58ee251

File tree

3 files changed

+15
-32
lines changed

3 files changed

+15
-32
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29274,6 +29274,11 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
2927429274
}
2927529275
}
2927629276

29277+
// Try to widen the shuffle before generating a possibly expensive SVE TBL.
29278+
// This may allow the shuffle to be matched as something cheaper like ZIP1.
29279+
if (SDValue WideOp = tryWidenMaskForShuffle(Op, DAG))
29280+
return WideOp;
29281+
2927729282
// Avoid producing TBL instruction if we don't know SVE register minimal size,
2927829283
// unless NEON is not available and we can assume minimal SVE register size is
2927929284
// 128-bits.

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
3-
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME
2+
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK
3+
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
44
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
55

66
target triple = "aarch64-unknown-linux-gnu"
@@ -406,33 +406,13 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
406406
;
407407

408408
define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
409-
; SVE2-LABEL: concat_v4f16:
410-
; SVE2: // %bb.0:
411-
; SVE2-NEXT: cnth x8
412-
; SVE2-NEXT: adrp x9, .LCPI15_0
413-
; SVE2-NEXT: adrp x10, .LCPI15_1
414-
; SVE2-NEXT: mov z2.h, w8
415-
; SVE2-NEXT: ldr q3, [x9, :lo12:.LCPI15_0]
416-
; SVE2-NEXT: ldr q4, [x10, :lo12:.LCPI15_1]
417-
; SVE2-NEXT: ptrue p0.h, vl8
418-
; SVE2-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
419-
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
420-
; SVE2-NEXT: mad z2.h, p0/m, z3.h, z4.h
421-
; SVE2-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h
422-
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
423-
; SVE2-NEXT: ret
424-
;
425-
; SME-LABEL: concat_v4f16:
426-
; SME: // %bb.0:
427-
; SME-NEXT: // kill: def $d1 killed $d1 def $z1
428-
; SME-NEXT: // kill: def $d0 killed $d0 def $z0
429-
; SME-NEXT: mov z2.h, z1.h[1]
430-
; SME-NEXT: mov z3.h, z0.h[1]
431-
; SME-NEXT: zip1 z1.h, z1.h, z2.h
432-
; SME-NEXT: zip1 z0.h, z0.h, z3.h
433-
; SME-NEXT: zip1 z0.s, z0.s, z1.s
434-
; SME-NEXT: // kill: def $d0 killed $d0 killed $z0
435-
; SME-NEXT: ret
409+
; CHECK-LABEL: concat_v4f16:
410+
; CHECK: // %bb.0:
411+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
412+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
413+
; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
414+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
415+
; CHECK-NEXT: ret
436416
;
437417
; NONEON-NOSVE-LABEL: concat_v4f16:
438418
; NONEON-NOSVE: // %bb.0:

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,8 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) {
276276
define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
277277
; CHECK-LABEL: extract_subvector_v4f16:
278278
; CHECK: // %bb.0:
279-
; CHECK-NEXT: adrp x8, .LCPI12_0
280279
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
281-
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
282-
; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
280+
; CHECK-NEXT: mov z0.s, z0.s[1]
283281
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
284282
; CHECK-NEXT: ret
285283
;

0 commit comments

Comments
 (0)