From 09153dbab44a45749088b748905270cf6423321e Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 17 Jun 2025 13:27:15 +0000 Subject: [PATCH 1/2] Add RUN line to show failure to match constant based splats. --- .../complex-deinterleaving-splat-scalable.ll | 66 ++++++++++++------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll index 6615313613153..d38cbc8c99c11 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll @@ -1,32 +1,54 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s --mattr=+sve -o - | FileCheck %s +; RUN: llc < %s --mattr=+sve -o - | FileCheck %s -check-prefixes=CHECK,SHUFFLE_SPLAT +; RUN: llc -use-constant-int-for-scalable-splat -use-constant-fp-for-scalable-splat < %s --mattr=+sve -o - | FileCheck %s -check-prefixes=CHECK,CONST_SPLAT target triple = "aarch64" ; a[i] * b[i] * (11.0 + 3.0.i); ; define @complex_mul_const( %a, %b) { -; CHECK-LABEL: complex_mul_const: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v4.2d, #0000000000000000 -; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fmov z6.d, #3.00000000 -; CHECK-NEXT: fmov z7.d, #11.00000000 -; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #0 -; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #0 -; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #90 -; CHECK-NEXT: movi v2.2d, #0000000000000000 -; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #90 -; CHECK-NEXT: zip2 z1.d, z7.d, z6.d -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: zip1 z3.d, z7.d, z6.d -; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #0 -; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #0 -; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #90 -; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #90 -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret +; SHUFFLE_SPLAT-LABEL: complex_mul_const: +; SHUFFLE_SPLAT: // %bb.0: // %entry +; SHUFFLE_SPLAT-NEXT: movi v4.2d, #0000000000000000 +; SHUFFLE_SPLAT-NEXT: movi v5.2d, #0000000000000000 +; SHUFFLE_SPLAT-NEXT: ptrue p0.d +; SHUFFLE_SPLAT-NEXT: fmov z6.d, #3.00000000 +; SHUFFLE_SPLAT-NEXT: fmov z7.d, #11.00000000 +; SHUFFLE_SPLAT-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #0 +; SHUFFLE_SPLAT-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #0 +; SHUFFLE_SPLAT-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #90 +; SHUFFLE_SPLAT-NEXT: movi v2.2d, #0000000000000000 +; SHUFFLE_SPLAT-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #90 +; SHUFFLE_SPLAT-NEXT: zip2 z1.d, z7.d, z6.d +; SHUFFLE_SPLAT-NEXT: movi v0.2d, #0000000000000000 +; SHUFFLE_SPLAT-NEXT: zip1 z3.d, z7.d, z6.d +; SHUFFLE_SPLAT-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #0 +; SHUFFLE_SPLAT-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #0 +; SHUFFLE_SPLAT-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #90 +; SHUFFLE_SPLAT-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #90 +; SHUFFLE_SPLAT-NEXT: mov z1.d, z2.d +; SHUFFLE_SPLAT-NEXT: ret +; +; CONST_SPLAT-LABEL: complex_mul_const: +; CONST_SPLAT: // %bb.0: // %entry +; CONST_SPLAT-NEXT: uzp1 z4.d, z2.d, z3.d +; CONST_SPLAT-NEXT: uzp2 z5.d, z0.d, z1.d +; CONST_SPLAT-NEXT: uzp2 z2.d, z2.d, z3.d +; CONST_SPLAT-NEXT: uzp1 z0.d, z0.d, z1.d +; CONST_SPLAT-NEXT: ptrue p0.d +; CONST_SPLAT-NEXT: fmul z3.d, z4.d, z5.d +; CONST_SPLAT-NEXT: fmul z1.d, z2.d, z5.d +; CONST_SPLAT-NEXT: fmov z5.d, #11.00000000 +; CONST_SPLAT-NEXT: fmad z2.d, p0/m, z0.d, z3.d +; CONST_SPLAT-NEXT: fmov z3.d, #3.00000000 +; CONST_SPLAT-NEXT: fnmsb z0.d, p0/m, z4.d, z1.d +; CONST_SPLAT-NEXT: fmul z1.d, z2.d, z5.d +; CONST_SPLAT-NEXT: fmul z2.d, z2.d, z3.d +; CONST_SPLAT-NEXT: fmla z1.d, p0/m, z0.d, z3.d +; CONST_SPLAT-NEXT: fnmls z2.d, p0/m, z0.d, z5.d +; CONST_SPLAT-NEXT: zip1 z0.d, z2.d, z1.d +; CONST_SPLAT-NEXT: zip2 z1.d, z2.d, z1.d +; CONST_SPLAT-NEXT: ret entry: %strided.vec = tail call { , } @llvm.vector.deinterleave2.nxv4f64( %a) %0 = extractvalue { , } %strided.vec, 0 From edd876d660d6915688e8f856a705330edff6da70 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 17 Jun 2025 11:41:01 +0000 Subject: [PATCH 2/2] [LLVM][ComplexDeinterleaving] Update splat identification to include vector ConstantInt/FP. --- .../lib/CodeGen/ComplexDeinterleavingPass.cpp | 3 + .../complex-deinterleaving-splat-scalable.ll | 67 +++++++------------ 2 files changed, 26 insertions(+), 44 deletions(-) diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index ae12423d827d5..8855740f0cc8f 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -2005,6 +2005,9 @@ ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) { if (isa(V)) return true; + if (isa(V) || isa(V)) + return isa(V->getType()); + VectorType *VTy; ArrayRef Mask; // Splats are represented differently depending on whether the repeated diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll index d38cbc8c99c11..e7a00fc90e31d 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll @@ -1,54 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s --mattr=+sve -o - | FileCheck %s -check-prefixes=CHECK,SHUFFLE_SPLAT -; RUN: llc -use-constant-int-for-scalable-splat -use-constant-fp-for-scalable-splat < %s --mattr=+sve -o - | FileCheck %s -check-prefixes=CHECK,CONST_SPLAT +; RUN: llc < %s --mattr=+sve -o - | FileCheck %s +; RUN: llc -use-constant-int-for-scalable-splat -use-constant-fp-for-scalable-splat < %s --mattr=+sve -o - | FileCheck %s target triple = "aarch64" ; a[i] * b[i] * (11.0 + 3.0.i); ; define @complex_mul_const( %a, %b) { -; SHUFFLE_SPLAT-LABEL: complex_mul_const: -; SHUFFLE_SPLAT: // %bb.0: // %entry -; SHUFFLE_SPLAT-NEXT: movi v4.2d, #0000000000000000 -; SHUFFLE_SPLAT-NEXT: movi v5.2d, #0000000000000000 -; SHUFFLE_SPLAT-NEXT: ptrue p0.d -; SHUFFLE_SPLAT-NEXT: fmov z6.d, #3.00000000 -; SHUFFLE_SPLAT-NEXT: fmov z7.d, #11.00000000 -; SHUFFLE_SPLAT-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #0 -; SHUFFLE_SPLAT-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #0 -; SHUFFLE_SPLAT-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #90 -; SHUFFLE_SPLAT-NEXT: movi v2.2d, #0000000000000000 -; SHUFFLE_SPLAT-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #90 -; SHUFFLE_SPLAT-NEXT: zip2 z1.d, z7.d, z6.d -; SHUFFLE_SPLAT-NEXT: movi v0.2d, #0000000000000000 -; SHUFFLE_SPLAT-NEXT: zip1 z3.d, z7.d, z6.d -; SHUFFLE_SPLAT-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #0 -; SHUFFLE_SPLAT-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #0 -; SHUFFLE_SPLAT-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #90 -; SHUFFLE_SPLAT-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #90 -; SHUFFLE_SPLAT-NEXT: mov z1.d, z2.d -; SHUFFLE_SPLAT-NEXT: ret -; -; CONST_SPLAT-LABEL: complex_mul_const: -; CONST_SPLAT: // %bb.0: // %entry -; CONST_SPLAT-NEXT: uzp1 z4.d, z2.d, z3.d -; CONST_SPLAT-NEXT: uzp2 z5.d, z0.d, z1.d -; CONST_SPLAT-NEXT: uzp2 z2.d, z2.d, z3.d -; CONST_SPLAT-NEXT: uzp1 z0.d, z0.d, z1.d -; CONST_SPLAT-NEXT: ptrue p0.d -; CONST_SPLAT-NEXT: fmul z3.d, z4.d, z5.d -; CONST_SPLAT-NEXT: fmul z1.d, z2.d, z5.d -; CONST_SPLAT-NEXT: fmov z5.d, #11.00000000 -; CONST_SPLAT-NEXT: fmad z2.d, p0/m, z0.d, z3.d -; CONST_SPLAT-NEXT: fmov z3.d, #3.00000000 -; CONST_SPLAT-NEXT: fnmsb z0.d, p0/m, z4.d, z1.d -; CONST_SPLAT-NEXT: fmul z1.d, z2.d, z5.d -; CONST_SPLAT-NEXT: fmul z2.d, z2.d, z3.d -; CONST_SPLAT-NEXT: fmla z1.d, p0/m, z0.d, z3.d -; CONST_SPLAT-NEXT: fnmls z2.d, p0/m, z0.d, z5.d -; CONST_SPLAT-NEXT: zip1 z0.d, z2.d, z1.d -; CONST_SPLAT-NEXT: zip2 z1.d, z2.d, z1.d -; CONST_SPLAT-NEXT: ret +; CHECK-LABEL: complex_mul_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v4.2d, #0000000000000000 +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmov z6.d, #3.00000000 +; CHECK-NEXT: fmov z7.d, #11.00000000 +; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #0 +; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #0 +; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #90 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #90 +; CHECK-NEXT: zip2 z1.d, z7.d, z6.d +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: zip1 z3.d, z7.d, z6.d +; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #0 +; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #0 +; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #90 +; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #90 +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.vector.deinterleave2.nxv4f64( %a) %0 = extractvalue { , } %strided.vec, 0