Skip to content

Commit 66e79e2

Browse files
committed
[AArch64][GlobalISel] Adopt some Ld1Lane* patterns for GlobalISel to reduce codegen regressions
Additionally, ignore G_CONSTANTs between adjacent instructions in isObviouslySafeToFold(). Without that, the new patterns don't have an impact.
1 parent 21e1b13 commit 66e79e2

File tree

7 files changed

+140
-170
lines changed

7 files changed

+140
-170
lines changed

llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/CodeGen/MachineInstr.h"
1717
#include "llvm/CodeGen/MachineOperand.h"
1818
#include "llvm/CodeGen/MachineRegisterInfo.h"
19+
#include "llvm/CodeGen/TargetOpcodes.h"
1920

2021
#define DEBUG_TYPE "gi-match-table-executor"
2122

@@ -62,9 +63,16 @@ bool GIMatchTableExecutor::isBaseWithConstantOffset(
6263
bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
6364
MachineInstr &IntoMI) const {
6465
// Immediate neighbours are already folded.
65-
if (MI.getParent() == IntoMI.getParent() &&
66-
std::next(MI.getIterator()) == IntoMI.getIterator())
67-
return true;
66+
// Any G_CONSTANT between immediate neighbours can be ignored.
67+
if (MI.getParent() == IntoMI.getParent()) {
68+
auto IntoIt = IntoMI.getIterator();
69+
auto NextIt = std::next(MI.getIterator());
70+
while (!NextIt.isEnd() && NextIt != IntoIt &&
71+
NextIt->getOpcode() == TargetOpcode::G_CONSTANT)
72+
++NextIt;
73+
if (NextIt == IntoIt)
74+
return true;
75+
}
6876

6977
// Convergent instructions cannot be moved in the CFG.
7078
if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,9 @@ def UImmS8XForm : SDNodeXForm<imm, [{
484484
return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i64);
485485
}]>;
486486

487+
def gi_UImmS1XForm : GICustomOperandRenderer<"renderUImmS1">,
488+
GISDNodeXFormEquiv<UImmS1XForm>;
489+
487490
// uimm5sN predicate - True if the immediate is a multiple of N in the range
488491
// [0 * N, 32 * N].
489492
def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>;

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ def : GINodeEquiv<G_UMULL, AArch64umull>;
271271
def : GINodeEquiv<G_SMULL, AArch64smull>;
272272

273273
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
274+
def : GINodeEquiv<G_INSERT_VECTOR_ELT, insertelt>;
274275

275276
def : GINodeEquiv<G_PREFETCH, AArch64Prefetch>;
276277

@@ -528,3 +529,38 @@ def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
528529
(LD1Rv2d GPR64sp:$Rn)>;
529530
def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
530531
(LD1Rv1d GPR64sp:$Rn)>;
532+
533+
class Ld1Lane64PatGISel<SDPatternOperator scalar_load, Operand VecIndex,
534+
ValueType VTy, ValueType STy, Instruction LD1>
535+
: Pat<(insertelt (VTy VecListOne64:$Rd),
536+
(STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
537+
(EXTRACT_SUBREG
538+
(LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
539+
(UImmS1XForm VecIndex:$idx), GPR64sp:$Rn),
540+
dsub)>;
541+
542+
class Ld1Lane128PatGISel<Operand VecIndex, ValueType VTy,
543+
ValueType STy, Instruction LD1>
544+
: Pat<(insertelt (VTy VecListOne128:$Rd),
545+
(STy (load GPR64sp:$Rn)), VecIndex:$idx),
546+
(LD1 VecListOne128:$Rd, (UImmS1XForm VecIndex:$idx), GPR64sp:$Rn)>;
547+
548+
// Enable these patterns only for GlobalISel, since
549+
// SelectionDAG analogues only select insertelt with i32 indices.
550+
let Predicates = [OnlyGISel] in {
551+
def : Ld1Lane64PatGISel<load, VectorIndexB, v8i8, i8, LD1i8>;
552+
def : Ld1Lane64PatGISel<load, VectorIndexB32b, v8i8, i8, LD1i8>;
553+
def : Ld1Lane64PatGISel<load, VectorIndexH, v4i16, i16, LD1i16>;
554+
def : Ld1Lane64PatGISel<load, VectorIndexH32b, v4i16, i16, LD1i16>;
555+
def : Ld1Lane64PatGISel<load, VectorIndexS, v2i32, i32, LD1i32>;
556+
def : Ld1Lane64PatGISel<load, VectorIndexS32b, v2i32, i32, LD1i32>;
557+
558+
def : Ld1Lane128PatGISel<VectorIndexB, v16i8, i8, LD1i8>;
559+
def : Ld1Lane128PatGISel<VectorIndexB32b, v16i8, i8, LD1i8>;
560+
def : Ld1Lane128PatGISel<VectorIndexH, v8i16, i16, LD1i16>;
561+
def : Ld1Lane128PatGISel<VectorIndexH32b, v8i16, i16, LD1i16>;
562+
def : Ld1Lane128PatGISel<VectorIndexH, v4i32, i32, LD1i32>;
563+
def : Ld1Lane128PatGISel<VectorIndexH32b, v4i32, i32, LD1i32>;
564+
def : Ld1Lane128PatGISel<VectorIndexH, v2i64, i64, LD1i64>;
565+
def : Ld1Lane128PatGISel<VectorIndexH32b, v2i64, i64, LD1i64>;
566+
}

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,8 @@ let RecomputePerFunction = 1 in {
872872

873873
def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
874874
def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
875+
876+
def OnlyGISel : Predicate<"MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
875877
// Toggles patterns which aren't beneficial in GlobalISel when we aren't
876878
// optimizing. This allows us to selectively use patterns without impacting
877879
// SelectionDAG's behaviour.

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,9 @@ class AArch64InstructionSelector : public InstructionSelector {
479479
const MachineInstr &MI,
480480
int OpIdx = -1) const;
481481

482+
void renderUImmS1(MachineInstrBuilder &MIB, const MachineInstr &MI,
483+
int OpIdx = -1) const;
484+
482485
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
483486
void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
484487

@@ -7593,6 +7596,14 @@ void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
75937596
.getZExtValue()));
75947597
}
75957598

7599+
void AArch64InstructionSelector::renderUImmS1(MachineInstrBuilder &MIB,
7600+
const MachineInstr &MI,
7601+
int OpIdx) const {
7602+
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7603+
"Expected G_CONSTANT");
7604+
MIB.addImm(MI.getOperand(1).getCImm()->getZExtValue());
7605+
}
7606+
75967607
bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
75977608
const MachineInstr &MI, unsigned NumBytes) const {
75987609
if (!MI.mayLoadOrStore())

llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

Lines changed: 29 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -14283,10 +14283,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <16 x i8> %A)
1428314283
;
1428414284
; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1lane:
1428514285
; CHECK-GISEL: ; %bb.0:
14286-
; CHECK-GISEL-NEXT: ldr b1, [x0]
14286+
; CHECK-GISEL-NEXT: ld1.b { v0 }[1], [x0]
1428714287
; CHECK-GISEL-NEXT: add x8, x0, #1
1428814288
; CHECK-GISEL-NEXT: str x8, [x1]
14289-
; CHECK-GISEL-NEXT: mov.b v0[1], v1[0]
1429014289
; CHECK-GISEL-NEXT: ret
1429114290
%tmp1 = load i8, ptr %bar
1429214291
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
@@ -14304,10 +14303,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
1430414303
;
1430514304
; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1lane:
1430614305
; CHECK-GISEL: ; %bb.0:
14307-
; CHECK-GISEL-NEXT: ldr b1, [x0]
14306+
; CHECK-GISEL-NEXT: ld1.b { v0 }[1], [x0]
1430814307
; CHECK-GISEL-NEXT: add x8, x0, x2
1430914308
; CHECK-GISEL-NEXT: str x8, [x1]
14310-
; CHECK-GISEL-NEXT: mov.b v0[1], v1[0]
1431114309
; CHECK-GISEL-NEXT: ret
1431214310
%tmp1 = load i8, ptr %bar
1431314311
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
@@ -14327,11 +14325,10 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
1432714325
;
1432814326
; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1lane:
1432914327
; CHECK-GISEL: ; %bb.0:
14330-
; CHECK-GISEL-NEXT: ldr b1, [x0]
1433114328
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
1433214329
; CHECK-GISEL-NEXT: add x8, x0, #1
14330+
; CHECK-GISEL-NEXT: ld1.b { v0 }[1], [x0]
1433314331
; CHECK-GISEL-NEXT: str x8, [x1]
14334-
; CHECK-GISEL-NEXT: mov.b v0[1], v1[0]
1433514332
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
1433614333
; CHECK-GISEL-NEXT: ret
1433714334
%tmp1 = load i8, ptr %bar
@@ -14352,11 +14349,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i
1435214349
;
1435314350
; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1lane:
1435414351
; CHECK-GISEL: ; %bb.0:
14355-
; CHECK-GISEL-NEXT: ldr b1, [x0]
1435614352
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
1435714353
; CHECK-GISEL-NEXT: add x8, x0, x2
14354+
; CHECK-GISEL-NEXT: ld1.b { v0 }[1], [x0]
1435814355
; CHECK-GISEL-NEXT: str x8, [x1]
14359-
; CHECK-GISEL-NEXT: mov.b v0[1], v1[0]
1436014356
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
1436114357
; CHECK-GISEL-NEXT: ret
1436214358
%tmp1 = load i8, ptr %bar
@@ -14375,10 +14371,9 @@ define <8 x i16> @test_v8i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i16> %A)
1437514371
;
1437614372
; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1lane:
1437714373
; CHECK-GISEL: ; %bb.0:
14378-
; CHECK-GISEL-NEXT: ldr h1, [x0]
14374+
; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0]
1437914375
; CHECK-GISEL-NEXT: add x8, x0, #2
1438014376
; CHECK-GISEL-NEXT: str x8, [x1]
14381-
; CHECK-GISEL-NEXT: mov.h v0[1], v1[0]
1438214377
; CHECK-GISEL-NEXT: ret
1438314378
%tmp1 = load i16, ptr %bar
1438414379
%tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
@@ -14397,9 +14392,8 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x
1439714392
;
1439814393
; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld1lane:
1439914394
; CHECK-GISEL: ; %bb.0:
14400-
; CHECK-GISEL-NEXT: ldr h1, [x0]
14395+
; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0]
1440114396
; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #1
14402-
; CHECK-GISEL-NEXT: mov.h v0[1], v1[0]
1440314397
; CHECK-GISEL-NEXT: str x8, [x1]
1440414398
; CHECK-GISEL-NEXT: ret
1440514399
%tmp1 = load i16, ptr %bar
@@ -14420,11 +14414,10 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A)
1442014414
;
1442114415
; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1lane:
1442214416
; CHECK-GISEL: ; %bb.0:
14423-
; CHECK-GISEL-NEXT: ldr h1, [x0]
1442414417
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
1442514418
; CHECK-GISEL-NEXT: add x8, x0, #2
14419+
; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0]
1442614420
; CHECK-GISEL-NEXT: str x8, [x1]
14427-
; CHECK-GISEL-NEXT: mov.h v0[1], v1[0]
1442814421
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
1442914422
; CHECK-GISEL-NEXT: ret
1443014423
%tmp1 = load i16, ptr %bar
@@ -14446,12 +14439,11 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
1444614439
;
1444714440
; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane:
1444814441
; CHECK-GISEL: ; %bb.0:
14449-
; CHECK-GISEL-NEXT: ldr h1, [x0]
1445014442
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
1445114443
; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #1
14452-
; CHECK-GISEL-NEXT: mov.h v0[1], v1[0]
14453-
; CHECK-GISEL-NEXT: str x8, [x1]
14444+
; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0]
1445414445
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
14446+
; CHECK-GISEL-NEXT: str x8, [x1]
1445514447
; CHECK-GISEL-NEXT: ret
1445614448
%tmp1 = load i16, ptr %bar
1445714449
%tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
@@ -14469,10 +14461,9 @@ define <4 x i32> @test_v4i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i32> %A)
1446914461
;
1447014462
; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1lane:
1447114463
; CHECK-GISEL: ; %bb.0:
14472-
; CHECK-GISEL-NEXT: ldr s1, [x0]
14464+
; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0]
1447314465
; CHECK-GISEL-NEXT: add x8, x0, #4
1447414466
; CHECK-GISEL-NEXT: str x8, [x1]
14475-
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
1447614467
; CHECK-GISEL-NEXT: ret
1447714468
%tmp1 = load i32, ptr %bar
1447814469
%tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
@@ -14491,9 +14482,8 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
1449114482
;
1449214483
; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld1lane:
1449314484
; CHECK-GISEL: ; %bb.0:
14494-
; CHECK-GISEL-NEXT: ldr s1, [x0]
14485+
; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0]
1449514486
; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #2
14496-
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
1449714487
; CHECK-GISEL-NEXT: str x8, [x1]
1449814488
; CHECK-GISEL-NEXT: ret
1449914489
%tmp1 = load i32, ptr %bar
@@ -14514,11 +14504,10 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A)
1451414504
;
1451514505
; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1lane:
1451614506
; CHECK-GISEL: ; %bb.0:
14517-
; CHECK-GISEL-NEXT: ldr s1, [x0]
1451814507
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
1451914508
; CHECK-GISEL-NEXT: add x8, x0, #4
14509+
; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0]
1452014510
; CHECK-GISEL-NEXT: str x8, [x1]
14521-
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
1452214511
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
1452314512
; CHECK-GISEL-NEXT: ret
1452414513
%tmp1 = load i32, ptr %bar
@@ -14540,12 +14529,11 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
1454014529
;
1454114530
; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1lane:
1454214531
; CHECK-GISEL: ; %bb.0:
14543-
; CHECK-GISEL-NEXT: ldr s1, [x0]
1454414532
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
1454514533
; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #2
14546-
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
14547-
; CHECK-GISEL-NEXT: str x8, [x1]
14534+
; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0]
1454814535
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
14536+
; CHECK-GISEL-NEXT: str x8, [x1]
1454914537
; CHECK-GISEL-NEXT: ret
1455014538
%tmp1 = load i32, ptr %bar
1455114539
%tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
@@ -14563,10 +14551,9 @@ define <2 x i64> @test_v2i64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i64> %A)
1456314551
;
1456414552
; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1lane:
1456514553
; CHECK-GISEL: ; %bb.0:
14566-
; CHECK-GISEL-NEXT: ldr d1, [x0]
14554+
; CHECK-GISEL-NEXT: ld1.d { v0 }[1], [x0]
1456714555
; CHECK-GISEL-NEXT: add x8, x0, #8
1456814556
; CHECK-GISEL-NEXT: str x8, [x1]
14569-
; CHECK-GISEL-NEXT: mov.d v0[1], v1[0]
1457014557
; CHECK-GISEL-NEXT: ret
1457114558
%tmp1 = load i64, ptr %bar
1457214559
%tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
@@ -14585,9 +14572,8 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
1458514572
;
1458614573
; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld1lane:
1458714574
; CHECK-GISEL: ; %bb.0:
14588-
; CHECK-GISEL-NEXT: ldr d1, [x0]
14575+
; CHECK-GISEL-NEXT: ld1.d { v0 }[1], [x0]
1458914576
; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #3
14590-
; CHECK-GISEL-NEXT: mov.d v0[1], v1[0]
1459114577
; CHECK-GISEL-NEXT: str x8, [x1]
1459214578
; CHECK-GISEL-NEXT: ret
1459314579
%tmp1 = load i64, ptr %bar
@@ -14606,10 +14592,9 @@ define <4 x float> @test_v4f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x float>
1460614592
;
1460714593
; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1lane:
1460814594
; CHECK-GISEL: ; %bb.0:
14609-
; CHECK-GISEL-NEXT: ldr s1, [x0]
14595+
; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0]
1461014596
; CHECK-GISEL-NEXT: add x8, x0, #4
1461114597
; CHECK-GISEL-NEXT: str x8, [x1]
14612-
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
1461314598
; CHECK-GISEL-NEXT: ret
1461414599
%tmp1 = load float, ptr %bar
1461514600
%tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
@@ -14628,9 +14613,8 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4
1462814613
;
1462914614
; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld1lane:
1463014615
; CHECK-GISEL: ; %bb.0:
14631-
; CHECK-GISEL-NEXT: ldr s1, [x0]
14616+
; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0]
1463214617
; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #2
14633-
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
1463414618
; CHECK-GISEL-NEXT: str x8, [x1]
1463514619
; CHECK-GISEL-NEXT: ret
1463614620
%tmp1 = load float, ptr %bar
@@ -14651,11 +14635,10 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float>
1465114635
;
1465214636
; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1lane:
1465314637
; CHECK-GISEL: ; %bb.0:
14654-
; CHECK-GISEL-NEXT: ldr s1, [x0]
1465514638
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
1465614639
; CHECK-GISEL-NEXT: add x8, x0, #4
14640+
; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0]
1465714641
; CHECK-GISEL-NEXT: str x8, [x1]
14658-
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
1465914642
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
1466014643
; CHECK-GISEL-NEXT: ret
1466114644
%tmp1 = load float, ptr %bar
@@ -14677,12 +14660,11 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2
1467714660
;
1467814661
; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1lane:
1467914662
; CHECK-GISEL: ; %bb.0:
14680-
; CHECK-GISEL-NEXT: ldr s1, [x0]
1468114663
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
1468214664
; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #2
14683-
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
14684-
; CHECK-GISEL-NEXT: str x8, [x1]
14665+
; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0]
1468514666
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
14667+
; CHECK-GISEL-NEXT: str x8, [x1]
1468614668
; CHECK-GISEL-NEXT: ret
1468714669
%tmp1 = load float, ptr %bar
1468814670
%tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
@@ -14700,10 +14682,9 @@ define <2 x double> @test_v2f64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x double
1470014682
;
1470114683
; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1lane:
1470214684
; CHECK-GISEL: ; %bb.0:
14703-
; CHECK-GISEL-NEXT: ldr d1, [x0]
14685+
; CHECK-GISEL-NEXT: ld1.d { v0 }[1], [x0]
1470414686
; CHECK-GISEL-NEXT: add x8, x0, #8
1470514687
; CHECK-GISEL-NEXT: str x8, [x1]
14706-
; CHECK-GISEL-NEXT: mov.d v0[1], v1[0]
1470714688
; CHECK-GISEL-NEXT: ret
1470814689
%tmp1 = load double, ptr %bar
1470914690
%tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
@@ -14722,9 +14703,8 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <
1472214703
;
1472314704
; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld1lane:
1472414705
; CHECK-GISEL: ; %bb.0:
14725-
; CHECK-GISEL-NEXT: ldr d1, [x0]
14706+
; CHECK-GISEL-NEXT: ld1.d { v0 }[1], [x0]
1472614707
; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #3
14727-
; CHECK-GISEL-NEXT: mov.d v0[1], v1[0]
1472814708
; CHECK-GISEL-NEXT: str x8, [x1]
1472914709
; CHECK-GISEL-NEXT: ret
1473014710
%tmp1 = load double, ptr %bar
@@ -14779,15 +14759,14 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
1477914759
; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
1478014760
; CHECK-GISEL: ; %bb.0:
1478114761
; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #1
14782-
; CHECK-GISEL-NEXT: ldr h1, [x0]
1478314762
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
14763+
; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0]
1478414764
; CHECK-GISEL-NEXT: str x8, [x1]
14785-
; CHECK-GISEL-NEXT: mov.h v0[1], v1[0]
14786-
; CHECK-GISEL-NEXT: ldr d2, [x3]
1478714765
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
14788-
; CHECK-GISEL-NEXT: cnt.8b v2, v2
14789-
; CHECK-GISEL-NEXT: uaddlp.4h v2, v2
14790-
; CHECK-GISEL-NEXT: uaddlp.2s v1, v2
14766+
; CHECK-GISEL-NEXT: ldr d1, [x3]
14767+
; CHECK-GISEL-NEXT: cnt.8b v1, v1
14768+
; CHECK-GISEL-NEXT: uaddlp.4h v1, v1
14769+
; CHECK-GISEL-NEXT: uaddlp.2s v1, v1
1479114770
; CHECK-GISEL-NEXT: str d1, [x3]
1479214771
; CHECK-GISEL-NEXT: ret
1479314772
%tmp1 = load i16, ptr %bar
@@ -14987,9 +14966,8 @@ define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) {
1498714966
;
1498814967
; CHECK-GISEL-LABEL: test_inc_cycle:
1498914968
; CHECK-GISEL: ; %bb.0:
14990-
; CHECK-GISEL-NEXT: ldr s1, [x0]
14969+
; CHECK-GISEL-NEXT: ld1.s { v0 }[0], [x0]
1499114970
; CHECK-GISEL-NEXT: adrp x9, _var@PAGE
14992-
; CHECK-GISEL-NEXT: mov.s v0[0], v1[0]
1499314971
; CHECK-GISEL-NEXT: fmov x8, d0
1499414972
; CHECK-GISEL-NEXT: add x8, x0, x8, lsl #2
1499514973
; CHECK-GISEL-NEXT: str x8, [x9, _var@PAGEOFF]

0 commit comments

Comments
 (0)