Skip to content

Commit 2ad1d77

Browse files
[AArch64] Match constants in SelectSMETileSlice (#151494)
If the slice is a constant then it should try to use `WZR + <imm>` addressing mode if the constant fits the range.
1 parent 4443b37 commit 2ad1d77

File tree

2 files changed

+79
-8
lines changed

2 files changed

+79
-8
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7617,16 +7617,29 @@ bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
76177617
bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
76187618
SDValue &Base, SDValue &Offset,
76197619
unsigned Scale) {
7620-
// Try to untangle an ADD node into a 'reg + offset'
7621-
if (CurDAG->isBaseWithConstantOffset(N))
7622-
if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7620+
auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7621+
if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
76237622
int64_t ImmOff = C->getSExtValue();
7624-
if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7625-
Base = N.getOperand(0);
7626-
Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7627-
return true;
7628-
}
7623+
if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7624+
return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
76297625
}
7626+
return SDValue();
7627+
};
7628+
7629+
if (SDValue C = MatchConstantOffset(N)) {
7630+
Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7631+
Offset = C;
7632+
return true;
7633+
}
7634+
7635+
// Try to untangle an ADD node into a 'reg + offset'
7636+
if (CurDAG->isBaseWithConstantOffset(N)) {
7637+
if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7638+
Base = N.getOperand(0);
7639+
Offset = C;
7640+
return true;
7641+
}
7642+
}
76307643

76317644
// By default, just match reg + 0.
76327645
Base = N;
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+sme2 -enable-subreg-liveness < %s| FileCheck %s
3+
4+
target triple = "aarch64"
5+
6+
; The tile-slice addressing mode supports an immediate of 0-7.
7+
; This is testing an immediate of 0, 1, 7 (folded) and 8 (not folded).
8+
define void @sme_tileslice_addrmode_zero_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
9+
; CHECK-LABEL: sme_tileslice_addrmode_zero_base_plus_constant_offset:
10+
; CHECK: // %bb.0:
11+
; CHECK-NEXT: mov w8, wzr
12+
; CHECK-NEXT: mov w9, #8 // =0x8
13+
; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
14+
; CHECK-NEXT: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z4.h[0]
15+
; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[0]
16+
; CHECK-NEXT: fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
17+
; CHECK-NEXT: ret
18+
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
19+
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
20+
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 7, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
21+
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 8, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
22+
ret void
23+
}
24+
25+
; The tile-slice addressing mode supports an immediate of 0-7.
26+
; This is testing an immediate of 0, 1, 7 (folded) and 8 (not folded).
27+
define void @sme_tileslice_addrmode_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
28+
; CHECK-LABEL: sme_tileslice_addrmode_base_plus_constant_offset:
29+
; CHECK: // %bb.0:
30+
; CHECK-NEXT: mov w8, w0
31+
; CHECK-NEXT: add w9, w0, #8
32+
; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
33+
; CHECK-NEXT: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z4.h[0]
34+
; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[0]
35+
; CHECK-NEXT: fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
36+
; CHECK-NEXT: ret
37+
%slice0 = add i32 %slice, 0
38+
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
39+
%slice1 = add i32 %slice, 1
40+
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
41+
%slice7 = add i32 %slice, 7
42+
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice7, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
43+
%slice8 = add i32 %slice, 8
44+
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice8, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
45+
ret void
46+
}
47+
48+
define void @sme_tileslice_addrmode_base_plus_zero_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
49+
; CHECK-LABEL: sme_tileslice_addrmode_base_plus_zero_offset:
50+
; CHECK: // %bb.0:
51+
; CHECK-NEXT: mov w8, w0
52+
; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
53+
; CHECK-NEXT: ret
54+
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
55+
ret void
56+
}
57+
58+
declare void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32 immarg)

0 commit comments

Comments
 (0)