Skip to content

Commit 265694a

Browse files
committed
Also sink vector types with different element counts
1 parent 343168a commit 265694a

File tree

2 files changed

+14
-9
lines changed

2 files changed

+14
-9
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5169,9 +5169,14 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
51695169
}
51705170
case Instruction::Mul: {
51715171
auto ShouldSinkSplatForIndexedVariant = [](Value *V) {
5172-
auto VT = MVT::getVT(V->getType(), /*HandleUnknown=*/true);
5173-
return (VT == MVT::v4i16 || VT == MVT::v8i16 || VT == MVT::v2i32 ||
5174-
VT == MVT::v4i32);
5172+
auto *Ty = cast<VectorType>(V->getType());
5173+
// For SVE the lane-indexing is within 128-bits, so we can't fold splats.
5174+
if (Ty->isScalableTy())
5175+
return false;
5176+
5177+
// Indexed variants of Mul exist for i16 and i32 element types only.
5178+
auto ElemVT = MVT::getVT(Ty->getElementType(), /*HandleUnknown=*/true);
5179+
return (ElemVT == MVT::i16 || ElemVT == MVT::i32);
51755180
};
51765181

51775182
int NumZExts = 0, NumSExts = 0;

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,10 +1291,10 @@ for.end12: ; preds = %vector.body
12911291
define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocapture readonly %A, i32 %val) {
12921292
; CHECK-SD-LABEL: matrix_mul_signed_and_double:
12931293
; CHECK-SD: // %bb.0: // %vector.header
1294-
; CHECK-SD-NEXT: and w8, w3, #0xffff
1294+
; CHECK-SD-NEXT: and w9, w3, #0xffff
12951295
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
1296-
; CHECK-SD-NEXT: dup v0.4s, w8
12971296
; CHECK-SD-NEXT: and x8, x0, #0xfffffff0
1297+
; CHECK-SD-NEXT: fmov s0, w9
12981298
; CHECK-SD-NEXT: .LBB13_1: // %vector.body
12991299
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
13001300
; CHECK-SD-NEXT: add x9, x2, w0, uxtw #1
@@ -1307,10 +1307,10 @@ define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocaptur
13071307
; CHECK-SD-NEXT: sshll v1.4s, v1.4h, #0
13081308
; CHECK-SD-NEXT: sshll2 v4.4s, v2.8h, #0
13091309
; CHECK-SD-NEXT: sshll v2.4s, v2.4h, #0
1310-
; CHECK-SD-NEXT: mul v3.4s, v0.4s, v3.4s
1311-
; CHECK-SD-NEXT: mul v1.4s, v0.4s, v1.4s
1312-
; CHECK-SD-NEXT: mul v4.4s, v0.4s, v4.4s
1313-
; CHECK-SD-NEXT: mul v2.4s, v0.4s, v2.4s
1310+
; CHECK-SD-NEXT: mul v3.4s, v3.4s, v0.s[0]
1311+
; CHECK-SD-NEXT: mul v1.4s, v1.4s, v0.s[0]
1312+
; CHECK-SD-NEXT: mul v4.4s, v4.4s, v0.s[0]
1313+
; CHECK-SD-NEXT: mul v2.4s, v2.4s, v0.s[0]
13141314
; CHECK-SD-NEXT: stp q1, q3, [x9]
13151315
; CHECK-SD-NEXT: stp q2, q4, [x9, #32]
13161316
; CHECK-SD-NEXT: b.ne .LBB13_1

0 commit comments

Comments
 (0)