Skip to content

Commit 6b4e1f7

Browse files
[AArch64] Add ISel for postindex ld1/st1 in big-endian
When big-endian we need to use ld1/st1 for vector loads and stores so that we get the elements in the correct order, but this prevents postindex addressing from being used. Fix this by adding the appropriate ISel patterns, plus the relevant changes in ISelLowering and ISelDAGToDAG to cause postindex addressing to be used.
1 parent 58d2347 commit 6b4e1f7

File tree

5 files changed

+293
-55
lines changed

5 files changed

+293
-55
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,6 +1583,8 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
15831583
EVT DstVT = N->getValueType(0);
15841584
ISD::MemIndexedMode AM = LD->getAddressingMode();
15851585
bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1586+
ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1587+
int OffsetVal = (int)OffsetOp->getZExtValue();
15861588

15871589
// We're not doing validity checking here. That was done when checking
15881590
// if we should mark the load as indexed or not. We're just selecting
@@ -1637,18 +1639,58 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
16371639
Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
16381640
} else if (VT == MVT::f32) {
16391641
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1640-
} else if (VT == MVT::f64 || VT.is64BitVector()) {
1642+
} else if (VT == MVT::f64 ||
1643+
(VT.is64BitVector() && Subtarget->isLittleEndian())) {
16411644
Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1642-
} else if (VT.is128BitVector()) {
1645+
} else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
16431646
Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1647+
} else if (VT.is64BitVector()) {
1648+
if (IsPre || OffsetVal != 8)
1649+
return false;
1650+
switch (VT.getScalarSizeInBits()) {
1651+
case 8:
1652+
Opcode = AArch64::LD1Onev8b_POST;
1653+
break;
1654+
case 16:
1655+
Opcode = AArch64::LD1Onev4h_POST;
1656+
break;
1657+
case 32:
1658+
Opcode = AArch64::LD1Onev2s_POST;
1659+
break;
1660+
case 64:
1661+
Opcode = AArch64::LD1Onev1d_POST;
1662+
break;
1663+
default:
1664+
llvm_unreachable("Expected vector element to be a power of 2");
1665+
}
1666+
} else if (VT.is128BitVector()) {
1667+
if (IsPre || OffsetVal != 16)
1668+
return false;
1669+
switch (VT.getScalarSizeInBits()) {
1670+
case 8:
1671+
Opcode = AArch64::LD1Onev16b_POST;
1672+
break;
1673+
case 16:
1674+
Opcode = AArch64::LD1Onev8h_POST;
1675+
break;
1676+
case 32:
1677+
Opcode = AArch64::LD1Onev4s_POST;
1678+
break;
1679+
case 64:
1680+
Opcode = AArch64::LD1Onev2d_POST;
1681+
break;
1682+
default:
1683+
llvm_unreachable("Expected vector element to be a power of 2");
1684+
}
16441685
} else
16451686
return false;
16461687
SDValue Chain = LD->getChain();
16471688
SDValue Base = LD->getBasePtr();
1648-
ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1649-
int OffsetVal = (int)OffsetOp->getZExtValue();
16501689
SDLoc dl(N);
1651-
SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1690+
// LD1 encodes an immediate offset by using XZR as the offset register.
1691+
SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1692+
? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1693+
: CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
16521694
SDValue Ops[] = { Base, Offset, Chain };
16531695
SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
16541696
MVT::Other, Ops);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2108,12 +2108,18 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
21082108
setOperationAction(ISD::STRICT_FSETCC, VT, Expand);
21092109
setOperationAction(ISD::STRICT_FSETCCS, VT, Expand);
21102110

2111+
// When little-endian we can use ordinary d and q register loads/stores for
2112+
// vector types, but when big-endian we need to use structure load/store which
2113+
// only allow post-index addressing.
21112114
if (Subtarget->isLittleEndian()) {
21122115
for (unsigned im = (unsigned)ISD::PRE_INC;
21132116
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
21142117
setIndexedLoadAction(im, VT, Legal);
21152118
setIndexedStoreAction(im, VT, Legal);
21162119
}
2120+
} else {
2121+
setIndexedLoadAction(ISD::POST_INC, VT, Legal);
2122+
setIndexedStoreAction(ISD::POST_INC, VT, Legal);
21172123
}
21182124

21192125
if (Subtarget->hasD128()) {
@@ -27067,6 +27073,12 @@ bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
2706727073
RHSC = -(uint64_t)RHSC;
2706827074
if (!isInt<9>(RHSC))
2706927075
return false;
27076+
// When big-endian VLD1/VST1 are used for vector load and store, and these
27077+
// only allow an offset that's equal to the store size.
27078+
EVT MemType = cast<MemSDNode>(N)->getMemoryVT();
27079+
if (!Subtarget->isLittleEndian() && MemType.isVector() &&
27080+
RHSC != MemType.getStoreSize())
27081+
return false;
2707027082
// Always emit pre-inc/post-inc addressing mode. Use negated constant offset
2707127083
// when dealing with subtraction.
2707227084
Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0));

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 51 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4942,39 +4942,42 @@ def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
49424942
def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
49434943
(STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
49444944

4945-
def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4946-
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4947-
def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4948-
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4949-
def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4950-
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4951-
def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4952-
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4953-
def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4954-
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4955-
def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4956-
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4957-
def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4958-
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4959-
def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4960-
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4961-
4962-
def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4963-
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4964-
def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4965-
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4966-
def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4967-
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4968-
def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4969-
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4970-
def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4971-
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4972-
def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4973-
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4974-
def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4975-
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4976-
def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4977-
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4945+
let Predicates = [IsLE] in {
4946+
// We must use ST1 to store vectors in big-endian.
4947+
def : Pat<(post_store(v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4948+
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4949+
def : Pat<(post_store(v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4950+
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4951+
def : Pat<(post_store(v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4952+
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4953+
def : Pat<(post_store(v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4954+
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4955+
def : Pat<(post_store(v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4956+
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4957+
def : Pat<(post_store(v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4958+
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4959+
def : Pat<(post_store(v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4960+
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4961+
def : Pat<(post_store(v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4962+
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4963+
4964+
def : Pat<(post_store(v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4965+
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4966+
def : Pat<(post_store(v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4967+
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4968+
def : Pat<(post_store(v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4969+
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4970+
def : Pat<(post_store(v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4971+
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4972+
def : Pat<(post_store(v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4973+
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4974+
def : Pat<(post_store(v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4975+
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4976+
def : Pat<(post_store(v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4977+
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4978+
def : Pat<(post_store(v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4979+
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4980+
}
49784981

49794982
//===----------------------------------------------------------------------===//
49804983
// Load/store exclusive instructions.
@@ -8925,6 +8928,21 @@ def : St1Pat<v4i16, ST1Onev4h>;
89258928
def : St1Pat<v2i32, ST1Onev2s>;
89268929
def : St1Pat<v1i64, ST1Onev1d>;
89278930

8931+
class St1PostPat<ValueType ty, Instruction INST, int off>
8932+
: Pat<(post_store ty:$Vt, GPR64sp:$Rn, (i64 off)),
8933+
(INST ty:$Vt, GPR64sp:$Rn, XZR)>;
8934+
8935+
let Predicates = [IsBE] in {
8936+
def : St1PostPat<v16i8, ST1Onev16b_POST, 16>;
8937+
def : St1PostPat<v8i16, ST1Onev8h_POST, 16>;
8938+
def : St1PostPat<v4i32, ST1Onev4s_POST, 16>;
8939+
def : St1PostPat<v2i64, ST1Onev2d_POST, 16>;
8940+
def : St1PostPat<v8i8, ST1Onev8b_POST, 8>;
8941+
def : St1PostPat<v4i16, ST1Onev4h_POST, 8>;
8942+
def : St1PostPat<v2i32, ST1Onev2s_POST, 8>;
8943+
def : St1PostPat<v1i64, ST1Onev1d_POST, 8>;
8944+
}
8945+
89288946
//---
89298947
// Single-element
89308948
//---
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s --check-prefixes=CHECK-LE
3+
; RUN: llc -mtriple=aarch64_be < %s -o - | FileCheck %s --check-prefixes=CHECK-BE
4+
5+
; Check that we use the correct offset mode for vector loads and stores, and in
6+
; particular for big-endian we use ld1/st1 which only allows postindex immediate
7+
; offset of the same size as the memory access size.
8+
; FIXME: Currently we fail to make use of postindex register offset ld1/st1.
9+
10+
define [2 x ptr] @postidx_same_size([2 x ptr] %x) {
11+
; CHECK-LE-LABEL: postidx_same_size:
12+
; CHECK-LE: // %bb.0: // %entry
13+
; CHECK-LE-NEXT: ldr d0, [x0], #8
14+
; CHECK-LE-NEXT: str d0, [x1], #8
15+
; CHECK-LE-NEXT: ret
16+
;
17+
; CHECK-BE-LABEL: postidx_same_size:
18+
; CHECK-BE: // %bb.0: // %entry
19+
; CHECK-BE-NEXT: ld1 { v0.4h }, [x0], #8
20+
; CHECK-BE-NEXT: st1 { v0.4h }, [x1], #8
21+
; CHECK-BE-NEXT: ret
22+
entry:
23+
%ldptr = extractvalue [2 x ptr] %x, 0
24+
%stptr = extractvalue [2 x ptr] %x, 1
25+
%val = load <4 x i16>, ptr %ldptr, align 2
26+
store <4 x i16> %val, ptr %stptr, align 2
27+
%add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8
28+
%add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8
29+
%ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0
30+
%ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1
31+
ret [2 x ptr] %ret2
32+
}
33+
34+
define [2 x ptr] @preidx_same_size([2 x ptr] %x) {
35+
; CHECK-LE-LABEL: preidx_same_size:
36+
; CHECK-LE: // %bb.0: // %entry
37+
; CHECK-LE-NEXT: ldr d0, [x0, #8]!
38+
; CHECK-LE-NEXT: str d0, [x1, #8]!
39+
; CHECK-LE-NEXT: ret
40+
;
41+
; CHECK-BE-LABEL: preidx_same_size:
42+
; CHECK-BE: // %bb.0: // %entry
43+
; CHECK-BE-NEXT: add x0, x0, #8
44+
; CHECK-BE-NEXT: add x1, x1, #8
45+
; CHECK-BE-NEXT: ld1 { v0.4h }, [x0]
46+
; CHECK-BE-NEXT: st1 { v0.4h }, [x1]
47+
; CHECK-BE-NEXT: ret
48+
entry:
49+
%ldptr = extractvalue [2 x ptr] %x, 0
50+
%stptr = extractvalue [2 x ptr] %x, 1
51+
%add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8
52+
%add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8
53+
%val = load <4 x i16>, ptr %add.ldptr, align 2
54+
store <4 x i16> %val, ptr %add.stptr, align 2
55+
%ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0
56+
%ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1
57+
ret [2 x ptr] %ret2
58+
}
59+
60+
define [2 x ptr] @postidx_different_size([2 x ptr] %x) {
61+
; CHECK-LE-LABEL: postidx_different_size:
62+
; CHECK-LE: // %bb.0: // %entry
63+
; CHECK-LE-NEXT: ldr d0, [x0], #16
64+
; CHECK-LE-NEXT: str d0, [x1], #16
65+
; CHECK-LE-NEXT: ret
66+
;
67+
; CHECK-BE-LABEL: postidx_different_size:
68+
; CHECK-BE: // %bb.0: // %entry
69+
; CHECK-BE-NEXT: ld1 { v0.4h }, [x0]
70+
; CHECK-BE-NEXT: mov x8, x1
71+
; CHECK-BE-NEXT: add x0, x0, #16
72+
; CHECK-BE-NEXT: add x1, x1, #16
73+
; CHECK-BE-NEXT: st1 { v0.4h }, [x8]
74+
; CHECK-BE-NEXT: ret
75+
entry:
76+
%ldptr = extractvalue [2 x ptr] %x, 0
77+
%stptr = extractvalue [2 x ptr] %x, 1
78+
%val = load <4 x i16>, ptr %ldptr, align 2
79+
store <4 x i16> %val, ptr %stptr, align 2
80+
%add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16
81+
%add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16
82+
%ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0
83+
%ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1
84+
ret [2 x ptr] %ret2
85+
}
86+
87+
define [2 x ptr] @preidx_different_size([2 x ptr] %x) {
88+
; CHECK-LE-LABEL: preidx_different_size:
89+
; CHECK-LE: // %bb.0: // %entry
90+
; CHECK-LE-NEXT: ldr d0, [x0, #16]!
91+
; CHECK-LE-NEXT: str d0, [x1, #16]!
92+
; CHECK-LE-NEXT: ret
93+
;
94+
; CHECK-BE-LABEL: preidx_different_size:
95+
; CHECK-BE: // %bb.0: // %entry
96+
; CHECK-BE-NEXT: add x0, x0, #16
97+
; CHECK-BE-NEXT: add x1, x1, #16
98+
; CHECK-BE-NEXT: ld1 { v0.4h }, [x0]
99+
; CHECK-BE-NEXT: st1 { v0.4h }, [x1]
100+
; CHECK-BE-NEXT: ret
101+
entry:
102+
%ldptr = extractvalue [2 x ptr] %x, 0
103+
%stptr = extractvalue [2 x ptr] %x, 1
104+
%add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16
105+
%add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16
106+
%val = load <4 x i16>, ptr %add.ldptr, align 2
107+
store <4 x i16> %val, ptr %add.stptr, align 2
108+
%ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0
109+
%ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1
110+
ret [2 x ptr] %ret2
111+
}
112+
113+
define [2 x ptr] @postidx_reg([2 x ptr] %x, i64 %off) {
114+
; CHECK-LE-LABEL: postidx_reg:
115+
; CHECK-LE: // %bb.0: // %entry
116+
; CHECK-LE-NEXT: ldr d0, [x0]
117+
; CHECK-LE-NEXT: mov x8, x1
118+
; CHECK-LE-NEXT: add x0, x0, x2
119+
; CHECK-LE-NEXT: add x1, x1, x2
120+
; CHECK-LE-NEXT: str d0, [x8]
121+
; CHECK-LE-NEXT: ret
122+
;
123+
; CHECK-BE-LABEL: postidx_reg:
124+
; CHECK-BE: // %bb.0: // %entry
125+
; CHECK-BE-NEXT: ld1 { v0.4h }, [x0]
126+
; CHECK-BE-NEXT: mov x8, x1
127+
; CHECK-BE-NEXT: add x0, x0, x2
128+
; CHECK-BE-NEXT: add x1, x1, x2
129+
; CHECK-BE-NEXT: st1 { v0.4h }, [x8]
130+
; CHECK-BE-NEXT: ret
131+
entry:
132+
%ldptr = extractvalue [2 x ptr] %x, 0
133+
%stptr = extractvalue [2 x ptr] %x, 1
134+
%val = load <4 x i16>, ptr %ldptr, align 2
135+
store <4 x i16> %val, ptr %stptr, align 2
136+
%add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off
137+
%add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off
138+
%ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0
139+
%ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1
140+
ret [2 x ptr] %ret2
141+
}
142+
143+
define [2 x ptr] @preidx_reg([2 x ptr] %x, i64 %off) {
144+
; CHECK-LE-LABEL: preidx_reg:
145+
; CHECK-LE: // %bb.0: // %entry
146+
; CHECK-LE-NEXT: mov x8, x1
147+
; CHECK-LE-NEXT: ldr d0, [x0, x2]
148+
; CHECK-LE-NEXT: add x0, x0, x2
149+
; CHECK-LE-NEXT: add x1, x1, x2
150+
; CHECK-LE-NEXT: str d0, [x8, x2]
151+
; CHECK-LE-NEXT: ret
152+
;
153+
; CHECK-BE-LABEL: preidx_reg:
154+
; CHECK-BE: // %bb.0: // %entry
155+
; CHECK-BE-NEXT: add x0, x0, x2
156+
; CHECK-BE-NEXT: add x1, x1, x2
157+
; CHECK-BE-NEXT: ld1 { v0.4h }, [x0]
158+
; CHECK-BE-NEXT: st1 { v0.4h }, [x1]
159+
; CHECK-BE-NEXT: ret
160+
entry:
161+
%ldptr = extractvalue [2 x ptr] %x, 0
162+
%stptr = extractvalue [2 x ptr] %x, 1
163+
%add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off
164+
%add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off
165+
%val = load <4 x i16>, ptr %add.ldptr, align 2
166+
store <4 x i16> %val, ptr %add.stptr, align 2
167+
%ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0
168+
%ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1
169+
ret [2 x ptr] %ret2
170+
}

0 commit comments

Comments
 (0)