diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 009d69b2b9433..da617b7e19266 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1583,6 +1583,8 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { EVT DstVT = N->getValueType(0); ISD::MemIndexedMode AM = LD->getAddressingMode(); bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; + ConstantSDNode *OffsetOp = cast(LD->getOffset()); + int OffsetVal = (int)OffsetOp->getZExtValue(); // We're not doing validity checking here. That was done when checking // if we should mark the load as indexed or not. We're just selecting @@ -1637,18 +1639,58 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; } else if (VT == MVT::f32) { Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; - } else if (VT == MVT::f64 || VT.is64BitVector()) { + } else if (VT == MVT::f64 || + (VT.is64BitVector() && Subtarget->isLittleEndian())) { Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; - } else if (VT.is128BitVector()) { + } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) { Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; + } else if (VT.is64BitVector()) { + if (IsPre || OffsetVal != 8) + return false; + switch (VT.getScalarSizeInBits()) { + case 8: + Opcode = AArch64::LD1Onev8b_POST; + break; + case 16: + Opcode = AArch64::LD1Onev4h_POST; + break; + case 32: + Opcode = AArch64::LD1Onev2s_POST; + break; + case 64: + Opcode = AArch64::LD1Onev1d_POST; + break; + default: + llvm_unreachable("Expected vector element to be a power of 2"); + } + } else if (VT.is128BitVector()) { + if (IsPre || OffsetVal != 16) + return false; + switch (VT.getScalarSizeInBits()) { + case 8: + Opcode = AArch64::LD1Onev16b_POST; + break; + case 16: + Opcode = AArch64::LD1Onev8h_POST; + break; + case 32: + Opcode = AArch64::LD1Onev4s_POST; + break; + case 64: + Opcode = AArch64::LD1Onev2d_POST; + break; + default: + llvm_unreachable("Expected vector element to be a power of 2"); + } } else return false; SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); - ConstantSDNode *OffsetOp = cast(LD->getOffset()); - int OffsetVal = (int)OffsetOp->getZExtValue(); SDLoc dl(N); - SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); + // LD1 encodes an immediate offset by using XZR as the offset register. + SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian()) + ? CurDAG->getRegister(AArch64::XZR, MVT::i64) + : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); SDValue Ops[] = { Base, Offset, Chain }; SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, MVT::Other, Ops); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c86aed7b38c8c..e4bb5a53c210d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2108,12 +2108,18 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) { setOperationAction(ISD::STRICT_FSETCC, VT, Expand); setOperationAction(ISD::STRICT_FSETCCS, VT, Expand); + // When little-endian we can use ordinary d and q register loads/stores for + // vector types, but when big-endian we need to use structure load/store which + // only allow post-index addressing. if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, VT, Legal); setIndexedStoreAction(im, VT, Legal); } + } else { + setIndexedLoadAction(ISD::POST_INC, VT, Legal); + setIndexedStoreAction(ISD::POST_INC, VT, Legal); } if (Subtarget->hasD128()) { @@ -27067,6 +27073,12 @@ bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op, RHSC = -(uint64_t)RHSC; if (!isInt<9>(RHSC)) return false; + // When big-endian VLD1/VST1 are used for vector load and store, and these + // only allow an offset that's equal to the store size. + EVT MemType = cast(N)->getMemoryVT(); + if (!Subtarget->isLittleEndian() && MemType.isVector() && + RHSC != MemType.getStoreSize()) + return false; // Always emit pre-inc/post-inc addressing mode. Use negated constant offset // when dealing with subtraction. Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f90f12b5ac3c7..400ffff5d567f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4942,39 +4942,42 @@ def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; - -def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(post_store(v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + + def : Pat<(post_store(v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +} //===----------------------------------------------------------------------===// // Load/store exclusive instructions. @@ -8925,6 +8928,21 @@ def : St1Pat; def : St1Pat; def : St1Pat; +class St1PostPat + : Pat<(post_store ty:$Vt, GPR64sp:$Rn, (i64 off)), + (INST ty:$Vt, GPR64sp:$Rn, XZR)>; + +let Predicates = [IsBE] in { + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; +} + //--- // Single-element //--- diff --git a/llvm/test/CodeGen/AArch64/vector-ldst-offset.ll b/llvm/test/CodeGen/AArch64/vector-ldst-offset.ll new file mode 100644 index 0000000000000..b31ba46893bd3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vector-ldst-offset.ll @@ -0,0 +1,2108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s --check-prefixes=CHECK-LE +; RUN: llc -mtriple=aarch64_be < %s -o - | FileCheck %s --check-prefixes=CHECK-BE + +; Check that we use the correct offset mode for vector loads and stores, and in +; particular for big-endian we use ld1/st1 which only allows postindex immediate +; offset of the same size as the memory access size. +; FIXME: Currently we fail to make use of postindex register offset ld1/st1. + +define [2 x ptr] @v8i8_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i8_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.8b }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i8>, ptr %ldptr, align 2 + store <8 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i8_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: st1 { v0.8b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <8 x i8>, ptr %add.ldptr, align 2 + store <8 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i8_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.8b }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i8>, ptr %ldptr, align 2 + store <8 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i8_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: st1 { v0.8b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <8 x i8>, ptr %add.ldptr, align 2 + store <8 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8i8_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.8b }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i8>, ptr %ldptr, align 2 + store <8 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8i8_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: st1 { v0.8b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <8 x i8>, ptr %add.ldptr, align 2 + store <8 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i16_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.4h }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i16>, ptr %ldptr, align 2 + store <4 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i16_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <4 x i16>, ptr %add.ldptr, align 2 + store <4 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i16_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.4h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i16>, ptr %ldptr, align 2 + store <4 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i16_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <4 x i16>, ptr %add.ldptr, align 2 + store <4 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4i16_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.4h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i16>, ptr %ldptr, align 2 + store <4 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4i16_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <4 x i16>, ptr %add.ldptr, align 2 + store <4 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i32_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.2s }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i32>, ptr %ldptr, align 2 + store <2 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i32_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <2 x i32>, ptr %add.ldptr, align 2 + store <2 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i32_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.2s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i32>, ptr %ldptr, align 2 + store <2 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i32_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <2 x i32>, ptr %add.ldptr, align 2 + store <2 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2i32_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.2s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i32>, ptr %ldptr, align 2 + store <2 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2i32_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <2 x i32>, ptr %add.ldptr, align 2 + store <2 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1i64_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.1d }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.1d }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x i64>, ptr %ldptr, align 2 + store <1 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1i64_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0, #8]! +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: str d0, [x8, #8] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <1 x i64>, ptr %add.ldptr, align 2 + store <1 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1i64_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0], #16 +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: str d0, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x i64>, ptr %ldptr, align 2 + store <1 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1i64_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0, #16]! +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: str d0, [x8, #16] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <1 x i64>, ptr %add.ldptr, align 2 + store <1 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v1i64_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: str d0, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x i64>, ptr %ldptr, align 2 + store <1 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v1i64_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: ldr d0, [x0, x2] +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: str d0, [x8, x2] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <1 x i64>, ptr %add.ldptr, align 2 + store <1 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f16_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.4h }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x half>, ptr %ldptr, align 2 + store <4 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f16_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <4 x half>, ptr %add.ldptr, align 2 + store <4 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f16_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.4h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x half>, ptr %ldptr, align 2 + store <4 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f16_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <4 x half>, ptr %add.ldptr, align 2 + store <4 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4f16_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.4h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x half>, ptr %ldptr, align 2 + store <4 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4f16_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <4 x half>, ptr %add.ldptr, align 2 + store <4 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f32_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.2s }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x float>, ptr %ldptr, align 2 + store <2 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f32_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <2 x float>, ptr %add.ldptr, align 2 + store <2 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f32_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.2s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x float>, ptr %ldptr, align 2 + store <2 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f32_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <2 x float>, ptr %add.ldptr, align 2 + store <2 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2f32_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.2s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x float>, ptr %ldptr, align 2 + store <2 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2f32_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <2 x float>, ptr %add.ldptr, align 2 + store <2 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1f64_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.1d }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.1d }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x double>, ptr %ldptr, align 2 + store <1 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1f64_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0, #8]! +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: str d0, [x8, #8] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <1 x double>, ptr %add.ldptr, align 2 + store <1 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1f64_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0], #16 +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: str d0, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x double>, ptr %ldptr, align 2 + store <1 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1f64_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0, #16]! +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: str d0, [x8, #16] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <1 x double>, ptr %add.ldptr, align 2 + store <1 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v1f64_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: str d0, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x double>, ptr %ldptr, align 2 + store <1 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v1f64_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: ldr d0, [x0, x2] +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: str d0, [x8, x2] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <1 x double>, ptr %add.ldptr, align 2 + store <1 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v16i8_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.16b }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <16 x i8>, ptr %ldptr, align 2 + store <16 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v16i8_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: st1 { v0.16b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <16 x i8>, ptr %add.ldptr, align 2 + store <16 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v16i8_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.16b }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <16 x i8>, ptr %ldptr, align 2 + store <16 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v16i8_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: st1 { v0.16b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <16 x i8>, ptr %add.ldptr, align 2 + store <16 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v16i8_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.16b }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <16 x i8>, ptr %ldptr, align 2 + store <16 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v16i8_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: st1 { v0.16b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <16 x i8>, ptr %add.ldptr, align 2 + store <16 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i16_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.8h }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i16>, ptr %ldptr, align 2 + store <8 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i16_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <8 x i16>, ptr %add.ldptr, align 2 + store <8 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i16_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.8h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i16>, ptr %ldptr, align 2 + store <8 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i16_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <8 x i16>, ptr %add.ldptr, align 2 + store <8 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8i16_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.8h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i16>, ptr %ldptr, align 2 + store <8 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8i16_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <8 x i16>, ptr %add.ldptr, align 2 + store <8 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i32_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.4s }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i32>, ptr %ldptr, align 2 + store <4 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i32_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <4 x i32>, ptr %add.ldptr, align 2 + store <4 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i32_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.4s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i32>, ptr %ldptr, align 2 + store <4 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i32_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <4 x i32>, ptr %add.ldptr, align 2 + store <4 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4i32_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.4s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i32>, ptr %ldptr, align 2 + store <4 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4i32_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <4 x i32>, ptr %add.ldptr, align 2 + store <4 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i64_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.2d }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i64>, ptr %ldptr, align 2 + store <2 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i64_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <2 x i64>, ptr %add.ldptr, align 2 + store <2 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i64_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.2d }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i64>, ptr %ldptr, align 2 + store <2 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i64_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <2 x i64>, ptr %add.ldptr, align 2 + store <2 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2i64_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.2d }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i64>, ptr %ldptr, align 2 + store <2 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2i64_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <2 x i64>, ptr %add.ldptr, align 2 + store <2 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8f16_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.8h }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x half>, ptr %ldptr, align 2 + store <8 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8f16_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <8 x half>, ptr %add.ldptr, align 2 + store <8 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8f16_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.8h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x half>, ptr %ldptr, align 2 + store <8 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8f16_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <8 x half>, ptr %add.ldptr, align 2 + store <8 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8f16_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.8h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x half>, ptr %ldptr, align 2 + store <8 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8f16_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <8 x half>, ptr %add.ldptr, align 2 + store <8 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f32_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.4s }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x float>, ptr %ldptr, align 2 + store <4 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f32_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <4 x float>, ptr %add.ldptr, align 2 + store <4 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f32_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.4s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x float>, ptr %ldptr, align 2 + store <4 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f32_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <4 x float>, ptr %add.ldptr, align 2 + store <4 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4f32_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.4s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x float>, ptr %ldptr, align 2 + store <4 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4f32_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <4 x float>, ptr %add.ldptr, align 2 + store <4 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f64_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.2d }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x double>, ptr %ldptr, align 2 + store <2 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f64_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <2 x double>, ptr %add.ldptr, align 2 + store <2 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f64_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.2d }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x double>, ptr %ldptr, align 2 + store <2 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f64_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <2 x double>, ptr %add.ldptr, align 2 + store <2 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2f64_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.2d }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x double>, ptr %ldptr, align 2 + store <2 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2f64_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <2 x double>, ptr %add.ldptr, align 2 + store <2 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll index 68a9dff812329..2a37183c47d51 100644 --- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll @@ -2835,14 +2835,13 @@ define i32 @test_widening_instr_mull(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE: // %bb.0: // %entry ; CHECK-BE-NEXT: .LBB24_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ld1 { v0.16b }, [x1] -; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] +; CHECK-BE-NEXT: ld1 { v0.16b }, [x1], #16 ; CHECK-BE-NEXT: add x8, x0, #16 +; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] ; CHECK-BE-NEXT: ld1 { v3.8h }, [x8] ; CHECK-BE-NEXT: add x9, x0, #48 ; CHECK-BE-NEXT: add x10, x0, #32 ; CHECK-BE-NEXT: subs w2, w2, #1 -; CHECK-BE-NEXT: add x1, x1, #16 ; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0 ; CHECK-BE-NEXT: umull v4.4s, v1.4h, v2.4h @@ -3094,7 +3093,7 @@ define i32 @test_widening_instr_mull_2(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] ; CHECK-BE-NEXT: .LBB26_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ld1 { v4.16b }, [x1] +; CHECK-BE-NEXT: ld1 { v4.16b }, [x1], #16 ; CHECK-BE-NEXT: add x8, x0, #32 ; CHECK-BE-NEXT: ld1 { v16.4s }, [x0] ; CHECK-BE-NEXT: add x9, x0, #48 @@ -3107,7 +3106,6 @@ define i32 @test_widening_instr_mull_2(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v3.16b ; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v2.16b ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b -; CHECK-BE-NEXT: add x1, x1, #16 ; CHECK-BE-NEXT: rev32 v5.16b, v5.16b ; CHECK-BE-NEXT: rev32 v6.16b, v6.16b ; CHECK-BE-NEXT: rev32 v7.16b, v7.16b @@ -3175,19 +3173,18 @@ define i32 @mul_zext_16i8_sext_16i8(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE: // %bb.0: // %entry ; CHECK-BE-NEXT: .LBB27_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] -; CHECK-BE-NEXT: ld1 { v1.16b }, [x1] +; CHECK-BE-NEXT: ld1 { v0.16b }, [x1], #16 ; CHECK-BE-NEXT: add x8, x0, #48 +; CHECK-BE-NEXT: ld1 { v1.16b }, [x0] ; CHECK-BE-NEXT: subs w2, w2, #1 -; CHECK-BE-NEXT: add x1, x1, #16 -; CHECK-BE-NEXT: sshll2 v2.8h, v0.16b, #0 -; CHECK-BE-NEXT: ushll2 v3.8h, v1.16b, #0 -; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-BE-NEXT: sshll2 v2.8h, v1.16b, #0 +; CHECK-BE-NEXT: ushll2 v3.8h, v0.16b, #0 +; CHECK-BE-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: smull2 v4.4s, v2.8h, v3.8h ; CHECK-BE-NEXT: smull v2.4s, v2.4h, v3.4h -; CHECK-BE-NEXT: smull v3.4s, v0.4h, v1.4h -; CHECK-BE-NEXT: smull2 v0.4s, v0.8h, v1.8h +; CHECK-BE-NEXT: smull v3.4s, v1.4h, v0.4h +; CHECK-BE-NEXT: smull2 v0.4s, v1.8h, v0.8h ; CHECK-BE-NEXT: st1 { v4.4s }, [x8] ; CHECK-BE-NEXT: add x8, x0, #32 ; CHECK-BE-NEXT: st1 { v3.4s }, [x0] @@ -3249,14 +3246,13 @@ define i32 @mul_zext_16i8_sext_16i16(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE: // %bb.0: // %entry ; CHECK-BE-NEXT: .LBB28_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ld1 { v0.16b }, [x1] -; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] +; CHECK-BE-NEXT: ld1 { v0.16b }, [x1], #16 ; CHECK-BE-NEXT: add x8, x0, #16 +; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] ; CHECK-BE-NEXT: ld1 { v3.8h }, [x8] ; CHECK-BE-NEXT: add x9, x0, #48 ; CHECK-BE-NEXT: add x10, x0, #32 ; CHECK-BE-NEXT: subs w2, w2, #1 -; CHECK-BE-NEXT: add x1, x1, #16 ; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0 ; CHECK-BE-NEXT: smull v4.4s, v1.4h, v2.4h