Skip to content

Commit d04840c

Browse files
committed
[LOH] Emit hits for LDP/STP instructions
1 parent ee46630 commit d04840c

File tree

3 files changed

+230
-13
lines changed

3 files changed

+230
-13
lines changed

llvm/lib/Target/AArch64/AArch64CollectLOH.cpp

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
192192
switch (MI.getOpcode()) {
193193
default:
194194
return false;
195+
// STR
195196
case AArch64::STRBBui:
196197
case AArch64::STRHHui:
197198
case AArch64::STRBui:
@@ -201,12 +202,37 @@ static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
201202
case AArch64::STRSui:
202203
case AArch64::STRDui:
203204
case AArch64::STRQui:
205+
// STUR
206+
case AArch64::STURBi:
207+
case AArch64::STURBBi:
208+
case AArch64::STURHi:
209+
case AArch64::STURHHi:
210+
case AArch64::STURWi:
211+
case AArch64::STURXi:
212+
case AArch64::STURSi:
213+
case AArch64::STURDi:
214+
case AArch64::STURQi:
204215
// We can only optimize the index operand.
205216
// In case we have str xA, [xA, #imm], this is two different uses
206217
// of xA and we cannot fold, otherwise the xA stored may be wrong,
207218
// even if #imm == 0.
208219
return MO.getOperandNo() == 1 &&
209220
MI.getOperand(0).getReg() != MI.getOperand(1).getReg();
221+
// STP
222+
case AArch64::STPWi:
223+
case AArch64::STPXi:
224+
case AArch64::STPSi:
225+
case AArch64::STPDi:
226+
case AArch64::STPQi:
227+
// STNP
228+
case AArch64::STNPWi:
229+
case AArch64::STNPXi:
230+
case AArch64::STNPSi:
231+
case AArch64::STNPDi:
232+
case AArch64::STNPQi:
233+
return MO.getOperandNo() == 2 &&
234+
MI.getOperand(0).getReg() != MI.getOperand(2).getReg() &&
235+
MI.getOperand(1).getReg() != MI.getOperand(2).getReg();
210236
}
211237
}
212238

@@ -216,6 +242,7 @@ static bool isCandidateLoad(const MachineInstr &MI) {
216242
switch (MI.getOpcode()) {
217243
default:
218244
return false;
245+
// LDR
219246
case AArch64::LDRSBWui:
220247
case AArch64::LDRSBXui:
221248
case AArch64::LDRSHWui:
@@ -228,11 +255,40 @@ static bool isCandidateLoad(const MachineInstr &MI) {
228255
case AArch64::LDRSui:
229256
case AArch64::LDRDui:
230257
case AArch64::LDRQui:
258+
// LDUR
259+
case AArch64::LDURBBi:
260+
case AArch64::LDURBi:
261+
case AArch64::LDURDi:
262+
case AArch64::LDURHHi:
263+
case AArch64::LDURHi:
264+
case AArch64::LDURQi:
265+
case AArch64::LDURSBWi:
266+
case AArch64::LDURSBXi:
267+
case AArch64::LDURSHWi:
268+
case AArch64::LDURSHXi:
269+
case AArch64::LDURSWi:
270+
case AArch64::LDURSi:
271+
case AArch64::LDURWi:
272+
case AArch64::LDURXi:
231273
return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT);
274+
// LDP
275+
case AArch64::LDPSi:
276+
case AArch64::LDPSWi:
277+
case AArch64::LDPDi:
278+
case AArch64::LDPQi:
279+
case AArch64::LDPWi:
280+
case AArch64::LDPXi:
281+
// LDNP
282+
case AArch64::LDNPSi:
283+
case AArch64::LDNPDi:
284+
case AArch64::LDNPQi:
285+
case AArch64::LDNPWi:
286+
case AArch64::LDNPXi:
287+
return !(MI.getOperand(3).getTargetFlags() & AArch64II::MO_GOT);
232288
}
233289
}
234290

235-
/// Check whether the given instruction can load a litteral.
291+
/// Check whether the given instruction can load a literal.
236292
static bool supportLoadFromLiteral(const MachineInstr &MI) {
237293
switch (MI.getOpcode()) {
238294
default:
Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,32 @@
1-
; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s
2-
; RUN: llc -o - %s -mtriple=arm64_32-apple-ios -O2 | FileCheck %s
1+
; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s --implicit-check-not=AdrpAddStr
2+
; RUN: llc -o - %s -mtriple=arm64_32-apple-ios -O2 | FileCheck %s --implicit-check-not=AdrpAddStr
33
; Test case for <rdar://problem/15942912>.
44
; AdrpAddStr cannot be used when the store uses same
55
; register as address and value. Indeed, the related
66
; if applied, may completely remove the definition or
77
; at least provide a wrong one (with the offset folded
88
; into the definition).
99

10-
%struct.anon = type { ptr, ptr }
10+
@A = internal global i32 0, align 4
1111

12-
@pptp_wan_head = internal global %struct.anon zeroinitializer, align 8
13-
14-
; CHECK-LABEL: _pptp_wan_init
15-
; CHECK: ret
16-
; CHECK-NOT: AdrpAddStr
17-
define i32 @pptp_wan_init() {
12+
define void @str() {
1813
entry:
19-
store ptr null, ptr @pptp_wan_head, align 8
20-
store ptr @pptp_wan_head, ptr getelementptr inbounds (%struct.anon, ptr @pptp_wan_head, i64 0, i32 1), align 8
21-
ret i32 0
14+
store ptr @A, ptr @A, align 4
15+
ret void
2216
}
2317

18+
define void @stp0(i64 %t) {
19+
entry:
20+
%addr = getelementptr inbounds i64, ptr @A, i32 1
21+
store ptr @A, ptr @A, align 4
22+
store i64 %t, ptr %addr, align 4
23+
ret void
24+
}
2425

26+
define void @stp1(i64 %t) {
27+
entry:
28+
%addr = getelementptr inbounds i64, ptr @A, i32 1
29+
store i64 %t, ptr @A, align 4
30+
store ptr @A, ptr %addr, align 4
31+
ret void
32+
}

llvm/test/CodeGen/AArch64/arm64-collect-loh.ll

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,34 @@ define i32 @getC() {
7171
ret i32 %res
7272
}
7373

74+
; CHECK-LABEL: _getCPair
75+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
76+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
77+
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
78+
; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF]
79+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
80+
; CHECK-NEXT: ldp q0, q1, [x[[LDRGOT_REG]]]
81+
; CHECK-NEXT: ret
82+
; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
83+
define <8 x i32> @getCPair() {
84+
%res = load <8 x i32>, ptr @C, align 4
85+
ret <8 x i32> %res
86+
}
87+
88+
; CHECK-LABEL: _getCNontemporalPair
89+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
90+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
91+
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
92+
; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF]
93+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
94+
; CHECK-NEXT: ldnp q0, q1, [x[[LDRGOT_REG]]]
95+
; CHECK-NEXT: ret
96+
; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
97+
define <8 x i32> @getCNontemporalPair() {
98+
%res = load <8 x i32>, ptr @C, align 4, !nontemporal !0
99+
ret <8 x i32> %res
100+
}
101+
74102
; LDRSW supports loading from a literal.
75103
; Make sure we emit AdrpLdrGotLdr for those.
76104
; CHECK-LABEL: _getSExtC
@@ -126,6 +154,36 @@ entry:
126154
ret void
127155
}
128156

157+
; CHECK-LABEL: _setCPair
158+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
159+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
160+
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
161+
; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF]
162+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
163+
; CHECK-NEXT: stp q0, q1, [x[[LDRGOT_REG]]]
164+
; CHECK-NEXT: ret
165+
; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
166+
define void @setCPair(<8 x i32> %t) {
167+
entry:
168+
store <8 x i32> %t, ptr @C, align 4
169+
ret void
170+
}
171+
172+
; CHECK-LABEL: _setCNontemporalPair
173+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
174+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
175+
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
176+
; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF]
177+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
178+
; CHECK-NEXT: stnp q0, q1, [x[[LDRGOT_REG]]]
179+
; CHECK-NEXT: ret
180+
; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
181+
define void @setCNontemporalPair(<8 x i32> %t) {
182+
entry:
183+
store <8 x i32> %t, ptr @C, align 4, !nontemporal !0
184+
ret void
185+
}
186+
129187
; Perform the same tests for internal global and a displacement
130188
; in the addressing mode.
131189
; Indeed we will get an ADD for those instead of LOADGot.
@@ -148,6 +206,51 @@ define i32 @getInternalCPlus4() {
148206
ret i32 %res
149207
}
150208

209+
; CHECK-LABEL: _getInternalCUnscaled
210+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
211+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
212+
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
213+
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
214+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
215+
; CHECK-NEXT: ldur w0, [[[ADDGOT_REG]], #-4]
216+
; CHECK-NEXT: ret
217+
; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
218+
define i32 @getInternalCUnscaled() {
219+
%addr = getelementptr inbounds i32, ptr @InternalC, i32 -1
220+
%res = load i32, ptr %addr, align 4
221+
ret i32 %res
222+
}
223+
224+
; CHECK-LABEL: _getInternalCPair
225+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
226+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
227+
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
228+
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
229+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
230+
; CHECK-NEXT: ldp q0, q1, [[[ADDGOT_REG]], #16]
231+
; CHECK-NEXT: ret
232+
; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
233+
define <8 x i32> @getInternalCPair() {
234+
%addr = getelementptr inbounds i32, ptr @InternalC, i32 4
235+
%res = load <8 x i32>, ptr %addr, align 4
236+
ret <8 x i32> %res
237+
}
238+
239+
; CHECK-LABEL: _getInternalCNontemporalPair
240+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
241+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
242+
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
243+
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
244+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
245+
; CHECK-NEXT: ldnp q0, q1, [[[ADDGOT_REG]], #16]
246+
; CHECK-NEXT: ret
247+
; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
248+
define <8 x i32> @getInternalCNontemporalPair() {
249+
%addr = getelementptr inbounds i32, ptr @InternalC, i32 4
250+
%res = load <8 x i32>, ptr %addr, align 4, !nontemporal !0
251+
ret <8 x i32> %res
252+
}
253+
151254
; LDRSW supports loading from a literal.
152255
; Make sure we emit AdrpLdrGotLdr for those.
153256
; CHECK-LABEL: _getSExtInternalCPlus4
@@ -206,6 +309,54 @@ entry:
206309
ret void
207310
}
208311

312+
; CHECK-LABEL: _setInternalCUnscaled
313+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
314+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
315+
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
316+
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
317+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
318+
; CHECK-NEXT: stur w0, [[[ADDGOT_REG]], #-4]
319+
; CHECK-NEXT: ret
320+
; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
321+
define void @setInternalCUnscaled(i32 %t) {
322+
entry:
323+
%addr = getelementptr inbounds i32, ptr @InternalC, i32 -1
324+
store i32 %t, ptr %addr, align 4
325+
ret void
326+
}
327+
328+
; CHECK-LABEL: _setInternalCPair
329+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
330+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
331+
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
332+
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
333+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
334+
; CHECK-NEXT: stp q0, q1, [[[ADDGOT_REG]], #16]
335+
; CHECK-NEXT: ret
336+
; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
337+
define void @setInternalCPair(<8 x i32> %t) {
338+
entry:
339+
%addr = getelementptr inbounds i32, ptr @InternalC, i32 4
340+
store <8 x i32> %t, ptr %addr, align 4
341+
ret void
342+
}
343+
344+
; CHECK-LABEL: _setInternalCNontemporalPair
345+
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
346+
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
347+
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
348+
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
349+
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
350+
; CHECK-NEXT: stnp q0, q1, [[[ADDGOT_REG]], #16]
351+
; CHECK-NEXT: ret
352+
; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
353+
define void @_setInternalCNontemporalPair(<8 x i32> %t) {
354+
entry:
355+
%addr = getelementptr inbounds i32, ptr @InternalC, i32 4
356+
store <8 x i32> %t, ptr %addr, align 4, !nontemporal !0
357+
ret void
358+
}
359+
209360
; Check that we catch AdrpAddLdr case when we have a simple chain:
210361
; adrp -> ldr.
211362
; CHECK-LABEL: _getInternalC
@@ -679,4 +830,6 @@ if.end.i:
679830
}
680831
declare void @callee(ptr nocapture readonly, ...)
681832

833+
!0 = !{ i32 1 }
834+
682835
attributes #0 = { "target-cpu"="cyclone" }

0 commit comments

Comments
 (0)