Skip to content

Commit ee4e917

Browse files
paulwalker-armkrishna2803
authored andcommitted
[LLVM][CodeGen][SME] Only emit strided loads in streaming mode. (llvm#150445)
The selection code for aarch64_sve_ld[nt]1_pn_x{2,4} intrinsics gates the use of strided load instructions behind the SME2 target feature. However, the instructions are only available in streaming mode.
1 parent ddcdf35 commit ee4e917

File tree

3 files changed

+18
-16
lines changed

3 files changed

+18
-16
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5296,7 +5296,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
52965296
}
52975297
case Intrinsic::aarch64_sve_ld1_pn_x2: {
52985298
if (VT == MVT::nxv16i8) {
5299-
if (Subtarget->hasSME2())
5299+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
53005300
SelectContiguousMultiVectorLoad(
53015301
Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
53025302
else if (Subtarget->hasSVE2p1())
@@ -5307,7 +5307,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53075307
return;
53085308
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
53095309
VT == MVT::nxv8bf16) {
5310-
if (Subtarget->hasSME2())
5310+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
53115311
SelectContiguousMultiVectorLoad(
53125312
Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
53135313
else if (Subtarget->hasSVE2p1())
@@ -5317,7 +5317,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53175317
break;
53185318
return;
53195319
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5320-
if (Subtarget->hasSME2())
5320+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
53215321
SelectContiguousMultiVectorLoad(
53225322
Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
53235323
else if (Subtarget->hasSVE2p1())
@@ -5327,7 +5327,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53275327
break;
53285328
return;
53295329
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5330-
if (Subtarget->hasSME2())
5330+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
53315331
SelectContiguousMultiVectorLoad(
53325332
Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
53335333
else if (Subtarget->hasSVE2p1())
@@ -5341,7 +5341,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53415341
}
53425342
case Intrinsic::aarch64_sve_ld1_pn_x4: {
53435343
if (VT == MVT::nxv16i8) {
5344-
if (Subtarget->hasSME2())
5344+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
53455345
SelectContiguousMultiVectorLoad(
53465346
Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
53475347
else if (Subtarget->hasSVE2p1())
@@ -5352,7 +5352,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53525352
return;
53535353
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
53545354
VT == MVT::nxv8bf16) {
5355-
if (Subtarget->hasSME2())
5355+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
53565356
SelectContiguousMultiVectorLoad(
53575357
Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
53585358
else if (Subtarget->hasSVE2p1())
@@ -5362,7 +5362,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53625362
break;
53635363
return;
53645364
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5365-
if (Subtarget->hasSME2())
5365+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
53665366
SelectContiguousMultiVectorLoad(
53675367
Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
53685368
else if (Subtarget->hasSVE2p1())
@@ -5372,7 +5372,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53725372
break;
53735373
return;
53745374
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5375-
if (Subtarget->hasSME2())
5375+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
53765376
SelectContiguousMultiVectorLoad(
53775377
Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
53785378
else if (Subtarget->hasSVE2p1())
@@ -5386,7 +5386,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53865386
}
53875387
case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
53885388
if (VT == MVT::nxv16i8) {
5389-
if (Subtarget->hasSME2())
5389+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
53905390
SelectContiguousMultiVectorLoad(Node, 2, 0,
53915391
AArch64::LDNT1B_2Z_IMM_PSEUDO,
53925392
AArch64::LDNT1B_2Z_PSEUDO);
@@ -5398,7 +5398,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53985398
return;
53995399
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
54005400
VT == MVT::nxv8bf16) {
5401-
if (Subtarget->hasSME2())
5401+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
54025402
SelectContiguousMultiVectorLoad(Node, 2, 1,
54035403
AArch64::LDNT1H_2Z_IMM_PSEUDO,
54045404
AArch64::LDNT1H_2Z_PSEUDO);
@@ -5409,7 +5409,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
54095409
break;
54105410
return;
54115411
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5412-
if (Subtarget->hasSME2())
5412+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
54135413
SelectContiguousMultiVectorLoad(Node, 2, 2,
54145414
AArch64::LDNT1W_2Z_IMM_PSEUDO,
54155415
AArch64::LDNT1W_2Z_PSEUDO);
@@ -5420,7 +5420,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
54205420
break;
54215421
return;
54225422
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5423-
if (Subtarget->hasSME2())
5423+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
54245424
SelectContiguousMultiVectorLoad(Node, 2, 3,
54255425
AArch64::LDNT1D_2Z_IMM_PSEUDO,
54265426
AArch64::LDNT1D_2Z_PSEUDO);
@@ -5435,7 +5435,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
54355435
}
54365436
case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
54375437
if (VT == MVT::nxv16i8) {
5438-
if (Subtarget->hasSME2())
5438+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
54395439
SelectContiguousMultiVectorLoad(Node, 4, 0,
54405440
AArch64::LDNT1B_4Z_IMM_PSEUDO,
54415441
AArch64::LDNT1B_4Z_PSEUDO);
@@ -5447,7 +5447,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
54475447
return;
54485448
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
54495449
VT == MVT::nxv8bf16) {
5450-
if (Subtarget->hasSME2())
5450+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
54515451
SelectContiguousMultiVectorLoad(Node, 4, 1,
54525452
AArch64::LDNT1H_4Z_IMM_PSEUDO,
54535453
AArch64::LDNT1H_4Z_PSEUDO);
@@ -5458,7 +5458,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
54585458
break;
54595459
return;
54605460
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5461-
if (Subtarget->hasSME2())
5461+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
54625462
SelectContiguousMultiVectorLoad(Node, 4, 2,
54635463
AArch64::LDNT1W_4Z_IMM_PSEUDO,
54645464
AArch64::LDNT1W_4Z_PSEUDO);
@@ -5469,7 +5469,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
54695469
break;
54705470
return;
54715471
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5472-
if (Subtarget->hasSME2())
5472+
if (Subtarget->hasSME2() && Subtarget->isStreaming())
54735473
SelectContiguousMultiVectorLoad(Node, 4, 3,
54745474
AArch64::LDNT1D_4Z_IMM_PSEUDO,
54755475
AArch64::LDNT1D_4Z_PSEUDO);

llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s --check-prefixes=STRIDED
33
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS
4+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+sme2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS
45

56
define <vscale x 32 x i8> @ld1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
67
; CHECK-LABEL: ld1_x2_i8_z0_z8:

llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s --check-prefixes=STRIDED
33
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS
4+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+sme2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS
45

56
define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
67
; STRIDED-LABEL: ldnt1_x2_i8_z0_z8:

0 commit comments

Comments
 (0)