diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 6aa8cd4f0232a..1387a224fa660 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -665,6 +665,10 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { } } +static bool isMemOpOrPrefetch(SDNode *N) { + return isa(*N) || N->getOpcode() == AArch64ISD::PREFETCH; +} + /// Determine whether it is worth it to fold SHL into the addressing /// mode. static bool isWorthFoldingSHL(SDValue V) { @@ -682,9 +686,9 @@ static bool isWorthFoldingSHL(SDValue V) { // computation, since the computation will be kept. const SDNode *Node = V.getNode(); for (SDNode *UI : Node->users()) - if (!isa(*UI)) + if (!isMemOpOrPrefetch(UI)) for (SDNode *UII : UI->users()) - if (!isa(*UII)) + if (!isMemOpOrPrefetch(UII)) return false; return true; } @@ -1248,7 +1252,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, // computation, since the computation will be kept. const SDNode *Node = N.getNode(); for (SDNode *UI : Node->users()) { - if (!isa(*UI)) + if (!isMemOpOrPrefetch(UI)) return false; } @@ -1332,7 +1336,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, // computation, since the computation will be kept. const SDNode *Node = N.getNode(); for (SDNode *UI : Node->users()) { - if (!isa(*UI)) + if (!isMemOpOrPrefetch(UI)) return false; } diff --git a/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll new file mode 100644 index 0000000000000..44202ffba6374 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +define void @imm9(ptr %object) { +; CHECK-LABEL: imm9: +; CHECK: // %bb.0: +; CHECK-NEXT: prfum pldl1keep, [x0, #7] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 7 + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @imm9_max(ptr %object) { +; CHECK-LABEL: imm9_max: +; CHECK: // %bb.0: +; CHECK-NEXT: prfum pldl1keep, [x0, #255] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 255 + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @imm9_above_max(ptr %object) { +; CHECK-LABEL: imm9_above_max: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #257 +; CHECK-NEXT: prfm pldl1keep, [x8] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 257 ; 256 would use the imm12 mode + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @imm9_min(ptr %object) { +; CHECK-LABEL: imm9_min: +; CHECK: // %bb.0: +; CHECK-NEXT: prfum pldl1keep, [x0, #-256] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -256 + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @imm9_below_min(ptr %object) { +; CHECK-LABEL: imm9_below_min: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #257 +; CHECK-NEXT: prfm pldl1keep, [x8] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -257 + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @imm12(ptr %object) { +; CHECK-LABEL: imm12: +; CHECK: // %bb.0: +; CHECK-NEXT: prfm pldl1keep, [x0, #8] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 1 + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @imm12_max(ptr %object) { +; CHECK-LABEL: imm12_max: +; CHECK: // %bb.0: +; CHECK-NEXT: prfm pldl1keep, [x0, #32760] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4095 + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @imm12_above_max(ptr %object) { +; CHECK-LABEL: imm12_above_max: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: prfm pldl1keep, [x0, x8] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4096 + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @reg(ptr %object, i64 %a) { +; CHECK-LABEL: reg: +; CHECK: // %bb.0: +; CHECK-NEXT: prfm pldl1keep, [x0, x1] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @reg_shift(ptr %object, i64 %a) { +; CHECK-LABEL: reg_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: prfm pldl1keep, [x0, x1, lsl #3] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @reg_sext(ptr %object, i32 %a) { +; CHECK-LABEL: reg_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: prfm pldl1keep, [x0, w1, sxtw] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i8, ptr %object, i32 %a + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @reg_sext_shift(ptr %object, i32 %a) { +; CHECK-LABEL: reg_sext_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: prfm pldl1keep, [x0, w1, sxtw #3] +; CHECK-NEXT: ret + %incdec.ptr = getelementptr inbounds i64, ptr %object, i32 %a + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @reg_zext(ptr %object, i32 %a) { +; CHECK-LABEL: reg_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: prfm pldl1keep, [x0, w1, uxtw] +; CHECK-NEXT: ret + %a.zext = zext i32 %a to i64 + %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a.zext + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +} + +define void @reg_zext_shift(ptr %object, i32 %a) { +; CHECK-LABEL: reg_zext_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: prfm pldl1keep, [x0, w1, uxtw #3] +; CHECK-NEXT: ret + %a.zext = zext i32 %a to i64 + %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a.zext + call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1) + ret void +}