diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp index 64f21c4cb2297..5232116ad7090 100644 --- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -192,6 +192,7 @@ static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) { switch (MI.getOpcode()) { default: return false; + // STR case AArch64::STRBBui: case AArch64::STRHHui: case AArch64::STRBui: @@ -201,12 +202,41 @@ static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) { case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: + // STUR + case AArch64::STURBi: + case AArch64::STURBBi: + case AArch64::STURHi: + case AArch64::STURHHi: + case AArch64::STURWi: + case AArch64::STURXi: + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: // We can only optimize the index operand. // In case we have str xA, [xA, #imm], this is two different uses // of xA and we cannot fold, otherwise the xA stored may be wrong, // even if #imm == 0. return MO.getOperandNo() == 1 && MI.getOperand(0).getReg() != MI.getOperand(1).getReg(); + // STP + case AArch64::STPWi: + case AArch64::STPXi: + case AArch64::STPSi: + case AArch64::STPDi: + case AArch64::STPQi: + // STNP + case AArch64::STNPWi: + case AArch64::STNPXi: + case AArch64::STNPSi: + case AArch64::STNPDi: + case AArch64::STNPQi: + // We can only optimize the index operand. + // In case we have str xA, xB, [xA, #imm] or str xA, xB [xB, #imm], this is + // two different uses of xA or xB and we cannot fold, otherwise the xA or xB + // stored may be wrong, even if #imm == 0. + return MO.getOperandNo() == 2 && + MI.getOperand(0).getReg() != MI.getOperand(2).getReg() && + MI.getOperand(1).getReg() != MI.getOperand(2).getReg(); } } @@ -216,6 +246,7 @@ static bool isCandidateLoad(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return false; + // LDR case AArch64::LDRSBWui: case AArch64::LDRSBXui: case AArch64::LDRSHWui: @@ -228,11 +259,40 @@ static bool isCandidateLoad(const MachineInstr &MI) { case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: + // LDUR + case AArch64::LDURBBi: + case AArch64::LDURBi: + case AArch64::LDURDi: + case AArch64::LDURHHi: + case AArch64::LDURHi: + case AArch64::LDURQi: + case AArch64::LDURSBWi: + case AArch64::LDURSBXi: + case AArch64::LDURSHWi: + case AArch64::LDURSHXi: + case AArch64::LDURSWi: + case AArch64::LDURSi: + case AArch64::LDURWi: + case AArch64::LDURXi: return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT); + // LDP + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPDi: + case AArch64::LDPQi: + case AArch64::LDPWi: + case AArch64::LDPXi: + // LDNP + case AArch64::LDNPSi: + case AArch64::LDNPDi: + case AArch64::LDNPQi: + case AArch64::LDNPWi: + case AArch64::LDNPXi: + return !(MI.getOperand(3).getTargetFlags() & AArch64II::MO_GOT); } } -/// Check whether the given instruction can load a litteral. +/// Check whether the given instruction can load a literal. static bool supportLoadFromLiteral(const MachineInstr &MI) { switch (MI.getOpcode()) { default: diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll index acc0df12a94e8..f8b469efe5afc 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll @@ -1,5 +1,5 @@ -; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s -; RUN: llc -o - %s -mtriple=arm64_32-apple-ios -O2 | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s --implicit-check-not=AdrpAddStr +; RUN: llc -o - %s -mtriple=arm64_32-apple-ios -O2 | FileCheck %s --implicit-check-not=AdrpAddStr ; Test case for . ; AdrpAddStr cannot be used when the store uses same ; register as address and value. Indeed, the related @@ -7,18 +7,26 @@ ; at least provide a wrong one (with the offset folded ; into the definition). -%struct.anon = type { ptr, ptr } +@A = internal global i32 0, align 4 -@pptp_wan_head = internal global %struct.anon zeroinitializer, align 8 - -; CHECK-LABEL: _pptp_wan_init -; CHECK: ret -; CHECK-NOT: AdrpAddStr -define i32 @pptp_wan_init() { +define void @str() { entry: - store ptr null, ptr @pptp_wan_head, align 8 - store ptr @pptp_wan_head, ptr getelementptr inbounds (%struct.anon, ptr @pptp_wan_head, i64 0, i32 1), align 8 - ret i32 0 + store ptr @A, ptr @A, align 4 + ret void } +define void @stp0(i64 %t) { +entry: + %addr = getelementptr inbounds i64, ptr @A, i32 1 + store ptr @A, ptr @A, align 4 + store i64 %t, ptr %addr, align 4 + ret void +} +define void @stp1(i64 %t) { +entry: + %addr = getelementptr inbounds i64, ptr @A, i32 1 + store i64 %t, ptr @A, align 4 + store ptr @A, ptr %addr, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll index 7f2bebf584d8f..6ac899fb41896 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -71,6 +71,34 @@ define i32 @getC() { ret i32 %res } +; CHECK-LABEL: _getCPair +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldp q0, q1, [x[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define <8 x i32> @getCPair() { + %res = load <8 x i32>, ptr @C, align 4 + ret <8 x i32> %res +} + +; CHECK-LABEL: _getCNontemporalPair +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldnp q0, q1, [x[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define <8 x i32> @getCNontemporalPair() { + %res = load <8 x i32>, ptr @C, align 4, !nontemporal !0 + ret <8 x i32> %res +} + ; LDRSW supports loading from a literal. ; Make sure we emit AdrpLdrGotLdr for those. ; CHECK-LABEL: _getSExtC @@ -126,6 +154,36 @@ entry: ret void } +; CHECK-LABEL: _setCPair +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: stp q0, q1, [x[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define void @setCPair(<8 x i32> %t) { +entry: + store <8 x i32> %t, ptr @C, align 4 + ret void +} + +; CHECK-LABEL: _setCNontemporalPair +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: stnp q0, q1, [x[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define void @setCNontemporalPair(<8 x i32> %t) { +entry: + store <8 x i32> %t, ptr @C, align 4, !nontemporal !0 + ret void +} + ; Perform the same tests for internal global and a displacement ; in the addressing mode. ; Indeed we will get an ADD for those instead of LOADGot. @@ -148,6 +206,51 @@ define i32 @getInternalCPlus4() { ret i32 %res } +; CHECK-LABEL: _getInternalCUnscaled +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldur w0, [[[ADDGOT_REG]], #-4] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] +define i32 @getInternalCUnscaled() { + %addr = getelementptr inbounds i32, ptr @InternalC, i32 -1 + %res = load i32, ptr %addr, align 4 + ret i32 %res +} + +; CHECK-LABEL: _getInternalCPair +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldp q0, q1, [[[ADDGOT_REG]], #16] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] +define <8 x i32> @getInternalCPair() { + %addr = getelementptr inbounds i32, ptr @InternalC, i32 4 + %res = load <8 x i32>, ptr %addr, align 4 + ret <8 x i32> %res +} + +; CHECK-LABEL: _getInternalCNontemporalPair +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldnp q0, q1, [[[ADDGOT_REG]], #16] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] +define <8 x i32> @getInternalCNontemporalPair() { + %addr = getelementptr inbounds i32, ptr @InternalC, i32 4 + %res = load <8 x i32>, ptr %addr, align 4, !nontemporal !0 + ret <8 x i32> %res +} + ; LDRSW supports loading from a literal. ; Make sure we emit AdrpLdrGotLdr for those. ; CHECK-LABEL: _getSExtInternalCPlus4 @@ -206,6 +309,54 @@ entry: ret void } +; CHECK-LABEL: _setInternalCUnscaled +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: stur w0, [[[ADDGOT_REG]], #-4] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] +define void @setInternalCUnscaled(i32 %t) { +entry: + %addr = getelementptr inbounds i32, ptr @InternalC, i32 -1 + store i32 %t, ptr %addr, align 4 + ret void +} + +; CHECK-LABEL: _setInternalCPair +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: stp q0, q1, [[[ADDGOT_REG]], #16] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] +define void @setInternalCPair(<8 x i32> %t) { +entry: + %addr = getelementptr inbounds i32, ptr @InternalC, i32 4 + store <8 x i32> %t, ptr %addr, align 4 + ret void +} + +; CHECK-LABEL: _setInternalCNontemporalPair +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: stnp q0, q1, [[[ADDGOT_REG]], #16] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] +define void @_setInternalCNontemporalPair(<8 x i32> %t) { +entry: + %addr = getelementptr inbounds i32, ptr @InternalC, i32 4 + store <8 x i32> %t, ptr %addr, align 4, !nontemporal !0 + ret void +} + ; Check that we catch AdrpAddLdr case when we have a simple chain: ; adrp -> ldr. ; CHECK-LABEL: _getInternalC @@ -679,4 +830,6 @@ if.end.i: } declare void @callee(ptr nocapture readonly, ...) +!0 = !{ i32 1 } + attributes #0 = { "target-cpu"="cyclone" }