diff --git a/llvm/lib/Target/AIE/AIECombine.td b/llvm/lib/Target/AIE/AIECombine.td index 51165a58e9da..d61ce22cffa0 100644 --- a/llvm/lib/Target/AIE/AIECombine.td +++ b/llvm/lib/Target/AIE/AIECombine.td @@ -252,6 +252,18 @@ def combine_trunc_load : GICombineRule< [{ return matchNarrowTruncLoad(*${root}, MRI, Helper, Observer, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; +def combine_unaligned_extract_load : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_EXTRACT_VECTOR_ELT, G_AIE_ZEXT_EXTRACT_VECTOR_ELT, G_AIE_SEXT_EXTRACT_VECTOR_ELT): $root, + [{ return matchUnalignedExtractLoad(*${root}, MRI, Observer, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; + +def combine_unaligned_vector_load : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_LOAD): $root, + [{ return matchUnalignedVectorLoad(*${root}, MRI, Observer, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + // AIE-specifc combines (currently shared by AIE2 and AIE2P). def aie_additional_combines : GICombineGroup<[ combine_unpad_vector, @@ -274,7 +286,9 @@ def aie_additional_combines : GICombineGroup<[ combine_align_memset, combine_peel_memset, combine_pack_stores_into_memset, - combine_trunc_load + combine_trunc_load, + combine_unaligned_extract_load, + combine_unaligned_vector_load ]>; // AIE2P-specific combines. @@ -408,4 +422,3 @@ def AIE2PPostLegalizerCustomCombiner combine_add_vector_elt_undef, ]> { } - diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 4a76a8198c3b..4cf2138cc54b 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -4292,3 +4292,324 @@ bool llvm::matchExtractVecEltAssertBcst(MachineInstr &MI, return true; } + +/// Helper function to recursively check if all uses of a register are valid +/// for the unaligned extract load combiner. +/// Automatically traverses through bitcasts to validate all usage patterns. +/// Valid terminal uses are: direct extracts or pad vector operations (with use +/// check). +static bool areLoadUsesValidForExtractCombine(Register Reg, + unsigned ZExtExtractOpcode, + unsigned SExtExtractOpcode, + unsigned PadVectorOpcode, + MachineRegisterInfo &MRI) { + + auto IsValidExtractOpcode = [&](unsigned Opcode) { + return Opcode == TargetOpcode::G_EXTRACT_VECTOR_ELT || + Opcode == ZExtExtractOpcode || Opcode == SExtExtractOpcode; + }; + + for (const MachineInstr &Use : MRI.use_nodbg_instructions(Reg)) { + const unsigned UseOpcode = Use.getOpcode(); + + if (UseOpcode == TargetOpcode::G_BITCAST) { + // Recursively check bitcast uses + const Register BitcastDst = Use.getOperand(0).getReg(); + if (!areLoadUsesValidForExtractCombine(BitcastDst, ZExtExtractOpcode, + SExtExtractOpcode, PadVectorOpcode, + MRI)) + return false; + continue; + } + + if (IsValidExtractOpcode(UseOpcode)) { + // Direct extract is valid (plain, zext, or sext) + continue; + } + + if (UseOpcode == PadVectorOpcode) { + // Pad is valid if only used by extracts + const Register PadDst = Use.getOperand(0).getReg(); + for (const MachineInstr &PadUse : MRI.use_nodbg_instructions(PadDst)) { + if (!IsValidExtractOpcode(PadUse.getOpcode())) + return false; + } + continue; + } + + // Invalid use + return false; + } + + return true; +} + +/// Match unaligned vector loads that are only used for extracting elements +/// and convert them to direct scalar loads. +/// Supports s8, s16 and s32 element extractions from various vector +/// configurations. Pattern: +/// %vec:_() = G_LOAD %ptr(p0) :: (align < M/8) +/// %bitcast:_() = G_BITCAST %vec +/// %idx:_(s32) = G_CONSTANT i32 N +/// %elt:_(sX) = G_EXTRACT_VECTOR_ELT %bitcast, %idx +/// Or with G_AIE_PAD_VECTOR_UNDEF: +/// %vec = G_LOAD %ptr :: (unaligned) +/// %bitcast = G_BITCAST %vec +/// %padded = G_AIE_PAD_VECTOR_UNDEF %bitcast +/// %result:_(s32) = G_AIE_[Z/S]EXT_EXTRACT_VECTOR_ELT %padded, %idx +/// Converts to: +/// %offset:_(s20) = G_CONSTANT i20 (N * sizeof(sX)) +/// %new_ptr:_(p0) = G_PTR_ADD %ptr, %offset +/// %elt:_(sX) = G_LOAD %new_ptr :: (align 1) +/// %result:_(s32) = G_[Z/S]EXT %elt +bool llvm::matchUnalignedExtractLoad(MachineInstr &ExtractMI, + MachineRegisterInfo &MRI, + GISelChangeObserver &Observer, + BuildFnTy &MatchInfo) { + const MachineFunction &MF = *ExtractMI.getMF(); + const AIEBaseInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + + const unsigned Opcode = ExtractMI.getOpcode(); + const unsigned ZExtExtractOpcode = + TII.getGenericExtractVectorEltOpcode(false); + const unsigned SExtExtractOpcode = TII.getGenericExtractVectorEltOpcode(true); + const unsigned PadVectorOpcode = TII.getGenericPadVectorOpcode(); + + const bool IsZExtExtract = (Opcode == ZExtExtractOpcode); + const bool IsSExtExtract = (Opcode == SExtExtractOpcode); + const bool IsPlainExtract = (Opcode == TargetOpcode::G_EXTRACT_VECTOR_ELT); + + if (!IsZExtExtract && !IsSExtExtract && !IsPlainExtract) + return false; + + // Get the index operand + const Register IdxReg = ExtractMI.getOperand(2).getReg(); + const auto IdxCst = getIConstantVRegValWithLookThrough(IdxReg, MRI); + if (!IdxCst) + return false; + const int64_t Index = IdxCst->Value.getSExtValue(); + + // Get the vector operand + const Register VecReg = ExtractMI.getOperand(1).getReg(); + const LLT VecTy = MRI.getType(VecReg); + + // Check if vector has extractable element types (s8, s16, or s32) + if (!VecTy.isVector()) + return false; + + const LLT ElemTy = VecTy.getElementType(); + const unsigned ElemSize = ElemTy.getSizeInBits(); + if (ElemSize != 8 && ElemSize != 16 && ElemSize != 32) + return false; + + // Trace back through G_AIE_PAD_VECTOR_UNDEF if present + MachineInstr *VecDefMI = MRI.getVRegDef(VecReg); + Register SourceVecReg = VecReg; + + if (VecDefMI->getOpcode() == PadVectorOpcode) { + SourceVecReg = VecDefMI->getOperand(1).getReg(); + VecDefMI = MRI.getVRegDef(SourceVecReg); + } + + // Check for G_BITCAST (or direct vector if no bitcast needed) + Register LoadVecReg = SourceVecReg; + if (VecDefMI->getOpcode() == TargetOpcode::G_BITCAST) + LoadVecReg = VecDefMI->getOperand(1).getReg(); + + MachineInstr *LoadMI = MRI.getVRegDef(LoadVecReg); + + // Check if it's a load + if (LoadMI->getOpcode() != TargetOpcode::G_LOAD) + return false; + + // Check if the load is unaligned relative to the vector's total size + if (LoadMI->memoperands_empty()) + return false; + + const MachineMemOperand *MMO = LoadMI->memoperands().front(); + const LLT LoadVecTy = MRI.getType(LoadVecReg); + const unsigned LoadVecSizeInBytes = LoadVecTy.getSizeInBytes(); + // Vector is unaligned if alignment < vector size + // This allows extracting elements when the vector load itself is unaligned + if (MMO->getAlign().value() >= LoadVecSizeInBytes) + return false; + + // Check that the loaded vector is only used by extracts (through bitcast and + // pad). The helper function will automatically traverse through bitcasts. + const Register LoadDstReg = LoadMI->getOperand(0).getReg(); + + if (!areLoadUsesValidForExtractCombine(LoadDstReg, ZExtExtractOpcode, + SExtExtractOpcode, PadVectorOpcode, + MRI)) + return false; + + // All checks passed, we can combine + MatchInfo = [=, &ExtractMI, &MRI, &Observer](MachineIRBuilder &B) { + const Register PtrReg = LoadMI->getOperand(1).getReg(); + const LLT S20 = LLT::scalar(20); + + const unsigned ElemSizeInBytes = ElemSize / 8; + const int64_t ByteOffset = Index * ElemSizeInBytes; + + // Set insertion point right after the original vector load + B.setInsertPt(*LoadMI->getParent(), std::next(LoadMI->getIterator())); + B.setDebugLoc(LoadMI->getDebugLoc()); + + // Create offset constant and pointer add + const Register OffsetReg = B.buildConstant(S20, ByteOffset).getReg(0); + const Register NewPtrReg = + B.buildPtrAdd(MRI.getType(PtrReg), PtrReg, OffsetReg).getReg(0); + + // Calculate alignment for scalar load based on original vector load + // alignment using GCD to find the maximum provable alignment + const unsigned OrigAlign = MMO->getAlign().value(); + const unsigned ScalarAlign = std::gcd(OrigAlign, OrigAlign + ByteOffset); + + // Create new scalar load with derived alignment + MachineFunction &MF = B.getMF(); + MachineMemOperand *NewMMO = + MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(), + ElemSizeInBytes, Align(ScalarAlign)); + + const Register LoadResultReg = MRI.createGenericVirtualRegister(ElemTy); + Observer.createdInstr(*B.buildLoad(LoadResultReg, NewPtrReg, *NewMMO)); + + // Now set insertion point at the extract position for the copy/extension + B.setInstr(ExtractMI); + + // Handle the result based on the original opcode + const Register DstReg = ExtractMI.getOperand(0).getReg(); + if (IsZExtExtract) { + // Need to zero-extend to s32 + Observer.createdInstr(*B.buildZExt(DstReg, LoadResultReg)); + } else if (IsSExtExtract) { + // Need to sign-extend to s32 + Observer.createdInstr(*B.buildSExt(DstReg, LoadResultReg)); + } else { + // G_EXTRACT_VECTOR_ELT + // Just copy the result + Observer.createdInstr(*B.buildCopy(DstReg, LoadResultReg)); + } + + Observer.erasingInstr(ExtractMI); + ExtractMI.eraseFromParent(); + }; + + return true; +} + +/// Match unaligned vector loads and transform them to use a better-aligned +/// element type based on the actual alignment. +/// Pattern: +/// %vec:_(<32 x s16>) = G_LOAD %ptr(p0) :: (align 4) +/// Converts to: +/// %vec_new:_(<16 x s32>) = G_LOAD %ptr(p0) :: (align 4) +/// %vec:_(<32 x s16>) = G_BITCAST %vec_new(<16 x s32>) +bool llvm::matchUnalignedVectorLoad(MachineInstr &LoadMI, + MachineRegisterInfo &MRI, + GISelChangeObserver &Observer, + BuildFnTy &MatchInfo) { + assert(LoadMI.getOpcode() == TargetOpcode::G_LOAD && "Expected G_LOAD"); + + // Get load information + const Register DstReg = LoadMI.getOperand(0).getReg(); + const LLT DstTy = MRI.getType(DstReg); + + // Only process vector loads + if (!DstTy.isVector()) + return false; + + // Check memory operand for alignment + if (LoadMI.memoperands_empty()) + return false; + + const MachineMemOperand *MMO = LoadMI.memoperands().front(); + const unsigned Alignment = MMO->getAlign().value(); + + // Skip if the vector is already well-aligned (alignment >= vector size) + const unsigned VecSizeInBytes = DstTy.getSizeInBytes(); + if (Alignment >= VecSizeInBytes) + return false; + + // Get element type information + const LLT ElemTy = DstTy.getElementType(); + const unsigned ElemSizeInBits = ElemTy.getSizeInBits(); + + // Skip if the load is only used for extracts - let matchUnalignedExtractLoad + // handle it. This prevents the two combiners from competing for the same + // opportunities + const MachineFunction &MF = *LoadMI.getMF(); + const AIEBaseInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + const unsigned ZExtExtractOpcode = + TII.getGenericExtractVectorEltOpcode(false); + const unsigned SExtExtractOpcode = TII.getGenericExtractVectorEltOpcode(true); + const unsigned PadVectorOpcode = TII.getGenericPadVectorOpcode(); + + if (areLoadUsesValidForExtractCombine( + DstReg, ZExtExtractOpcode, SExtExtractOpcode, PadVectorOpcode, MRI)) + return false; + + // Skip if the load has a single user that is a G_STORE with the same + // alignment. This case can be perfectly scalarized during legalization + if (MRI.hasOneNonDBGUse(DstReg)) { + const MachineInstr *UserMI = &*MRI.use_instr_nodbg_begin(DstReg); + if (UserMI->getOpcode() == TargetOpcode::G_STORE) { + const GStore *StoreMI = cast(UserMI); + if (!StoreMI->memoperands_empty()) { + const MachineMemOperand *StoreMMO = StoreMI->memoperands().front(); + // If store has the same alignment as the load, skip + if (StoreMMO->getAlign().value() == Alignment) + return false; + } + } + } + + // We already have the best element size option. + if (Alignment == ElemSizeInBits / 8) + return false; + + // Only handle s8 and s16 element types that can be promoted to s32 + if (ElemSizeInBits != 8 && ElemSizeInBits != 16) + return false; + + // Determine the optimal element type based on alignment + unsigned NewElemSizeInBits = 0; + if (Alignment >= 4) { + NewElemSizeInBits = 32; + } else if (Alignment >= 2) { + NewElemSizeInBits = 16; + } else { + // Alignment doesn't allow for a better element type + return false; + } + + // Check if the vector size is compatible with the new element size + const unsigned VecSizeInBits = DstTy.getSizeInBits(); + if (VecSizeInBits % NewElemSizeInBits != 0) + return false; + + MatchInfo = [=, PtrReg = LoadMI.getOperand(1).getReg(), &MRI, + &Observer](MachineIRBuilder &B) { + MachineFunction &MF = B.getMF(); + + // Calculate new number of elements + const unsigned NewNumElems = VecSizeInBits / NewElemSizeInBits; + + // Create the new vector type with better-aligned elements + const LLT NewVecTy = LLT::fixed_vector(NewNumElems, NewElemSizeInBits); + const Register NewLoadReg = MRI.createGenericVirtualRegister(NewVecTy); + + // Create a new MMO with the same properties but updated type + MachineMemOperand *NewMMO = MF.getMachineMemOperand( + MMO->getPointerInfo(), MMO->getFlags(), NewVecTy, MMO->getAlign()); + + Observer.createdInstr(*B.buildLoad(NewLoadReg, PtrReg, *NewMMO)); + + // Bitcast back to the original type + Observer.createdInstr(*B.buildBitcast(DstReg, NewLoadReg)); + }; + + return true; +} diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.h b/llvm/lib/Target/AIE/AIECombinerHelper.h index 266bf0e8c4f1..7a5d6aec275e 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.h +++ b/llvm/lib/Target/AIE/AIECombinerHelper.h @@ -308,6 +308,15 @@ bool matchExtractVecEltAssertBcst(MachineInstr &MI, MachineRegisterInfo &MRI, const AIEBaseInstrInfo &TII, GISelChangeObserver &Observer, BuildFnTy &MatchInfo); + +bool matchUnalignedExtractLoad(MachineInstr &ExtractMI, + MachineRegisterInfo &MRI, + GISelChangeObserver &Observer, + BuildFnTy &MatchInfo); + +bool matchUnalignedVectorLoad(MachineInstr &LoadMI, MachineRegisterInfo &MRI, + GISelChangeObserver &Observer, + BuildFnTy &MatchInfo); } // namespace llvm #endif diff --git a/llvm/test/CodeGen/AIE/GlobalISel/combine-unaligned-vector-load.mir b/llvm/test/CodeGen/AIE/GlobalISel/combine-unaligned-vector-load.mir new file mode 100644 index 000000000000..459f425855a9 --- /dev/null +++ b/llvm/test/CodeGen/AIE/GlobalISel/combine-unaligned-vector-load.mir @@ -0,0 +1,627 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple=aie2 -O0 -run-pass=aie2-prelegalizer-combiner -global-isel %s -o - | FileCheck %s --check-prefix=AIE2 +# RUN: llc -mtriple=aie2p -O0 -run-pass=aie2p-prelegalizer-combiner -global-isel %s -o - | FileCheck %s --check-prefix=AIE2P + +--- +name: test_s16_to_s32_align4 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_to_s32_align4 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<32 x s16>) + ; + ; AIE2P-LABEL: name: test_s16_to_s32_align4 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<32 x s16>) + %0:_(p0) = COPY $p0 + %1:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_s8_to_s32_align4 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s8_to_s32_align4 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<64 x s8>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<64 x s8>) + ; + ; AIE2P-LABEL: name: test_s8_to_s32_align4 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<64 x s8>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<64 x s8>) + %0:_(p0) = COPY $p0 + %1:_(<64 x s8>) = G_LOAD %0(p0) :: (dereferenceable load (<64 x s8>), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_s8_to_s16_align2 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s8_to_s16_align2 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>), align 2) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<64 x s8>) = G_BITCAST [[LOAD]](<32 x s16>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<64 x s8>) + ; + ; AIE2P-LABEL: name: test_s8_to_s16_align2 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>), align 2) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<64 x s8>) = G_BITCAST [[LOAD]](<32 x s16>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<64 x s8>) + %0:_(p0) = COPY $p0 + %1:_(<64 x s8>) = G_LOAD %0(p0) :: (dereferenceable load (<64 x s8>), align 2) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_s16_to_s64_align8 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_to_s64_align8 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 8) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<32 x s16>) + ; + ; AIE2P-LABEL: name: test_s16_to_s64_align8 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 8) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<32 x s16>) + %0:_(p0) = COPY $p0 + %1:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 8) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_s8_to_s64_align8 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s8_to_s64_align8 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 8) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<64 x s8>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<64 x s8>) + ; + ; AIE2P-LABEL: name: test_s8_to_s64_align8 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 8) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<64 x s8>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<64 x s8>) + %0:_(p0) = COPY $p0 + %1:_(<64 x s8>) = G_LOAD %0(p0) :: (dereferenceable load (<64 x s8>), align 8) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_already_aligned_not_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_already_aligned_not_combined + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](<32 x s16>) + ; + ; AIE2P-LABEL: name: test_already_aligned_not_combined + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](<32 x s16>) + %0:_(p0) = COPY $p0 + %1:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 64) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_align1_not_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_align1_not_combined + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>), align 1) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](<32 x s16>) + ; + ; AIE2P-LABEL: name: test_align1_not_combined + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>), align 1) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](<32 x s16>) + %0:_(p0) = COPY $p0 + %1:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 1) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_s32_vector_not_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s32_vector_not_combined + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](<16 x s32>) + ; + ; AIE2P-LABEL: name: test_s32_vector_not_combined + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](<16 x s32>) + %0:_(p0) = COPY $p0 + %1:_(<16 x s32>) = G_LOAD %0(p0) :: (dereferenceable load (<16 x s32>), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_scalar_load_not_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_scalar_load_not_combined + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (dereferenceable load (s16), align 4) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s16) + ; + ; AIE2P-LABEL: name: test_scalar_load_not_combined + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (dereferenceable load (s16), align 4) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s16) + %0:_(p0) = COPY $p0 + %1:_(s16) = G_LOAD %0(p0) :: (dereferenceable load (s16), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_small_vector_s16_to_s32 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_small_vector_s16_to_s32 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<4 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<8 x s16>) + ; + ; AIE2P-LABEL: name: test_small_vector_s16_to_s32 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<4 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<8 x s16>) + %0:_(p0) = COPY $p0 + %1:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_medium_vector_s16_to_s32 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_medium_vector_s16_to_s32 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s16>) = G_BITCAST [[LOAD]](<8 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<16 x s16>) + ; + ; AIE2P-LABEL: name: test_medium_vector_s16_to_s32 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s16>) = G_BITCAST [[LOAD]](<8 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<16 x s16>) + %0:_(p0) = COPY $p0 + %1:_(<16 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<16 x s16>), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_s16_align2_not_promoted_to_s32 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_align2_not_promoted_to_s32 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>), align 2) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](<32 x s16>) + ; + ; AIE2P-LABEL: name: test_s16_align2_not_promoted_to_s32 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>), align 2) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](<32 x s16>) + %0:_(p0) = COPY $p0 + %1:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 2) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_large_s8_vector_align4 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_large_s8_vector_align4 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<128 x s8>) = G_BITCAST [[LOAD]](<32 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<128 x s8>) + ; + ; AIE2P-LABEL: name: test_large_s8_vector_align4 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<128 x s8>) = G_BITCAST [[LOAD]](<32 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<128 x s8>) + %0:_(p0) = COPY $p0 + %1:_(<128 x s8>) = G_LOAD %0(p0) :: (dereferenceable load (<128 x s8>), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_s16_align4_different_sizes +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_align4_different_sizes + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<2 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[LOAD]](<2 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s16>) + ; + ; AIE2P-LABEL: name: test_s16_align4_different_sizes + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<2 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[LOAD]](<2 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s16>) + %0:_(p0) = COPY $p0 + %1:_(<4 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<4 x s16>), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_s8_small_vector_align2 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s8_small_vector_align2 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s16>), align 2) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<8 x s16>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<16 x s8>) + ; + ; AIE2P-LABEL: name: test_s8_small_vector_align2 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s16>), align 2) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<8 x s16>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<16 x s8>) + %0:_(p0) = COPY $p0 + %1:_(<16 x s8>) = G_LOAD %0(p0) :: (dereferenceable load (<16 x s8>), align 2) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_multiple_uses_of_load +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $p1 + + ; AIE2-LABEL: name: test_multiple_uses_of_load + ; AIE2: liveins: $p0, $p1 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2-NEXT: G_STORE [[BITCAST]](<32 x s16>), [[COPY1]](p0) :: (store (<32 x s16>)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<32 x s16>) + ; + ; AIE2P-LABEL: name: test_multiple_uses_of_load + ; AIE2P: liveins: $p0, $p1 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2P-NEXT: G_STORE [[BITCAST]](<32 x s16>), [[COPY1]](p0) :: (store (<32 x s16>)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<32 x s16>) + %0:_(p0) = COPY $p0 + %1:_(p0) = COPY $p1 + %2:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 4) + G_STORE %2(<32 x s16>), %1(p0) :: (store (<32 x s16>)) + PseudoRET implicit $lr, implicit %2 +... +--- +name: test_s16_16elem_to_s32_align4 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_16elem_to_s32_align4 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s16>) = G_BITCAST [[LOAD]](<8 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<16 x s16>) + ; + ; AIE2P-LABEL: name: test_s16_16elem_to_s32_align4 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s16>) = G_BITCAST [[LOAD]](<8 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<16 x s16>) + %0:_(p0) = COPY $p0 + %1:_(<16 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<16 x s16>), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_s8_32elem_to_s32_align4 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s8_32elem_to_s32_align4 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<32 x s8>) + ; + ; AIE2P-LABEL: name: test_s8_32elem_to_s32_align4 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<32 x s8>) + %0:_(p0) = COPY $p0 + %1:_(<32 x s8>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s8>), align 4) + PseudoRET implicit $lr, implicit %1 +... +--- +name: test_load_store_same_alignment_not_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $p1 + + ; AIE2-LABEL: name: test_load_store_same_alignment_not_combined + ; AIE2: liveins: $p0, $p1 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>), align 4) + ; AIE2-NEXT: G_STORE [[LOAD]](<32 x s16>), [[COPY1]](p0) :: (store (<32 x s16>), align 4) + ; AIE2-NEXT: PseudoRET implicit $lr + ; + ; AIE2P-LABEL: name: test_load_store_same_alignment_not_combined + ; AIE2P: liveins: $p0, $p1 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<32 x s16>), align 4) + ; AIE2P-NEXT: G_STORE [[LOAD]](<32 x s16>), [[COPY1]](p0) :: (store (<32 x s16>), align 4) + ; AIE2P-NEXT: PseudoRET implicit $lr + %0:_(p0) = COPY $p0 + %1:_(p0) = COPY $p1 + %2:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 4) + G_STORE %2(<32 x s16>), %1(p0) :: (store (<32 x s16>), align 4) + PseudoRET implicit $lr +... +--- +name: test_load_store_different_alignment_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $p1 + + ; AIE2-LABEL: name: test_load_store_different_alignment_combined + ; AIE2: liveins: $p0, $p1 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2-NEXT: G_STORE [[BITCAST]](<32 x s16>), [[COPY1]](p0) :: (store (<32 x s16>), align 2) + ; AIE2-NEXT: PseudoRET implicit $lr + ; + ; AIE2P-LABEL: name: test_load_store_different_alignment_combined + ; AIE2P: liveins: $p0, $p1 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2P-NEXT: G_STORE [[BITCAST]](<32 x s16>), [[COPY1]](p0) :: (store (<32 x s16>), align 2) + ; AIE2P-NEXT: PseudoRET implicit $lr + %0:_(p0) = COPY $p0 + %1:_(p0) = COPY $p1 + %2:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 4) + G_STORE %2(<32 x s16>), %1(p0) :: (store (<32 x s16>), align 2) + PseudoRET implicit $lr +... +--- +name: test_load_only_for_extracts_not_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_load_only_for_extracts_not_combined + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 10 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s16)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_load_only_for_extracts_not_combined + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 10 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s16)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %1:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 4) + %2:_(s32) = G_CONSTANT i32 5 + %3:_(s16) = G_EXTRACT_VECTOR_ELT %1(<32 x s16>), %2(s32) + %4:_(s32) = G_ZEXT %3(s16) + PseudoRET implicit $lr, implicit %4(s32) +... +--- +name: test_load_with_mixed_uses_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $p1 + + ; AIE2-LABEL: name: test_load_with_mixed_uses_combined + ; AIE2: liveins: $p0, $p1 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; AIE2-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<32 x s16>), [[C]](s32) + ; AIE2-NEXT: G_STORE [[BITCAST]](<32 x s16>), [[COPY1]](p0) :: (store (<32 x s16>)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[EVEC]](s16) + ; + ; AIE2P-LABEL: name: test_load_with_mixed_uses_combined + ; AIE2P: liveins: $p0, $p1 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<16 x s32>), align 4) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s16>) = G_BITCAST [[LOAD]](<16 x s32>) + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; AIE2P-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<32 x s16>), [[C]](s32) + ; AIE2P-NEXT: G_STORE [[BITCAST]](<32 x s16>), [[COPY1]](p0) :: (store (<32 x s16>)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[EVEC]](s16) + %0:_(p0) = COPY $p0 + %1:_(p0) = COPY $p1 + %2:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 4) + %3:_(s32) = G_CONSTANT i32 5 + %4:_(s16) = G_EXTRACT_VECTOR_ELT %2(<32 x s16>), %3(s32) + G_STORE %2(<32 x s16>), %1(p0) :: (store (<32 x s16>)) + PseudoRET implicit $lr, implicit %4(s16) +... diff --git a/llvm/test/CodeGen/AIE/GlobalISel/combiner-unaligned-extract-load.mir b/llvm/test/CodeGen/AIE/GlobalISel/combiner-unaligned-extract-load.mir new file mode 100644 index 000000000000..63ee769a0c4f --- /dev/null +++ b/llvm/test/CodeGen/AIE/GlobalISel/combiner-unaligned-extract-load.mir @@ -0,0 +1,826 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple=aie2 -O0 -run-pass=aie2-prelegalizer-combiner -global-isel %s -o - | FileCheck %s --check-prefix=AIE2 +# RUN: llc -mtriple=aie2p -O0 -run-pass=aie2p-prelegalizer-combiner -global-isel %s -o - | FileCheck %s --check-prefix=AIE2P + +--- +name: test_unaligned_vec_load_to_byte_loads +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_unaligned_vec_load_to_byte_loads + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 2 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8)) + ; AIE2-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 + ; AIE2-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s20) + ; AIE2-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s8)) + ; AIE2-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD1]], 8 + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32), implicit [[ZEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_unaligned_vec_load_to_byte_loads + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 2 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8)) + ; AIE2P-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 + ; AIE2P-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s8)) + ; AIE2P-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD1]], 8 + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32), implicit [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 1) + %15:_(<16 x s8>) = G_BITCAST %14(<8 x s16>) + %17:_(s32) = G_CONSTANT i32 4 + %16:_(s8) = G_EXTRACT_VECTOR_ELT %15(<16 x s8>), %17(s32) + %18:_(s32) = G_ZEXT %16(s8) + %158:_(<64 x s8>) = G_AIE_PAD_VECTOR_UNDEF %15(<16 x s8>) + %23:_(s32) = G_CONSTANT i32 2 + %156:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %158(<64 x s8>), %23(s32) + PseudoRET implicit $lr, implicit %18, implicit %156 +... +--- +name: test_aligned_load_not_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_aligned_load_not_combined + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 4) + ; AIE2-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 8 + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + ; + ; AIE2P-LABEL: name: test_aligned_load_not_combined + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 4) + ; AIE2P-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 8 + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 4) + %15:_(<16 x s8>) = G_BITCAST %14(<8 x s16>) + %17:_(s32) = G_CONSTANT i32 4 + %16:_(s8) = G_EXTRACT_VECTOR_ELT %15(<16 x s8>), %17(s32) + %18:_(s32) = G_ZEXT %16(s8) + PseudoRET implicit $lr, implicit %18 +... +--- +name: test_load_with_other_uses_not_combined +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $p1 + + ; AIE2-LABEL: name: test_load_with_other_uses_not_combined + ; AIE2: liveins: $p0, $p1 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s16>), align 2) + ; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<8 x s16>) + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; AIE2-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<64 x s8>) = G_AIE_PAD_VECTOR_UNDEF [[BITCAST]](<16 x s8>) + ; AIE2-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[AIE_PAD_VECTOR_UNDEF]](<64 x s8>), [[C]](s32) + ; AIE2-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 8 + ; AIE2-NEXT: G_STORE [[LOAD]](<8 x s16>), [[COPY1]](p0) :: (store (<8 x s16>)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + ; + ; AIE2P-LABEL: name: test_load_with_other_uses_not_combined + ; AIE2P: liveins: $p0, $p1 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (dereferenceable load (<8 x s16>), align 2) + ; AIE2P-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<8 x s16>) + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; AIE2P-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<64 x s8>) = G_AIE_PAD_VECTOR_UNDEF [[BITCAST]](<16 x s8>) + ; AIE2P-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[AIE_PAD_VECTOR_UNDEF]](<64 x s8>), [[C]](s32) + ; AIE2P-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 8 + ; AIE2P-NEXT: G_STORE [[LOAD]](<8 x s16>), [[COPY1]](p0) :: (store (<8 x s16>)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + %0:_(p0) = COPY $p0 + %1:_(p0) = COPY $p1 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 2) + %15:_(<16 x s8>) = G_BITCAST %14(<8 x s16>) + %17:_(s32) = G_CONSTANT i32 4 + %16:_(s8) = G_EXTRACT_VECTOR_ELT %15(<16 x s8>), %17(s32) + %18:_(s32) = G_ZEXT %16(s8) + G_STORE %14(<8 x s16>), %1(p0) :: (store (<8 x s16>)) + PseudoRET implicit $lr, implicit %18 +... +--- +name: test_plain_extract_without_zext +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_plain_extract_without_zext + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32), align 2) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + ; + ; AIE2P-LABEL: name: test_plain_extract_without_zext + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32), align 2) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s32>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s32>), align 2) + %17:_(s32) = G_CONSTANT i32 5 + %16:_(s32) = G_EXTRACT_VECTOR_ELT %14(<8 x s32>), %17(s32) + PseudoRET implicit $lr, implicit %16 +... +--- +name: test_direct_pad_s8_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_direct_pad_s8_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 5 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_direct_pad_s8_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 5 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<16 x s8>) = G_LOAD %0(p0) :: (dereferenceable load (<16 x s8>), align 2) + %158:_(<64 x s8>) = G_AIE_PAD_VECTOR_UNDEF %14(<16 x s8>) + %23:_(s32) = G_CONSTANT i32 5 + %156:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %158(<64 x s8>), %23(s32) + PseudoRET implicit $lr, implicit %156 +... +--- +name: test_direct_pad_s16_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_direct_pad_s16_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 6 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 1) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_direct_pad_s16_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 6 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 1) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 1) + %158:_(<32 x s16>) = G_AIE_PAD_VECTOR_UNDEF %14(<8 x s16>) + %23:_(s32) = G_CONSTANT i32 3 + %156:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %158(<32 x s16>), %23(s32) + PseudoRET implicit $lr, implicit %156 +... +--- +name: test_direct_pad_s32_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_direct_pad_s32_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32), align 2) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + ; + ; AIE2P-LABEL: name: test_direct_pad_s32_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32), align 2) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<4 x s32>) = G_LOAD %0(p0) :: (dereferenceable load (<4 x s32>), align 2) + %158:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF %14(<4 x s32>) + %23:_(s32) = G_CONSTANT i32 2 + %156:_(s32) = G_EXTRACT_VECTOR_ELT %158(<16 x s32>), %23(s32) + PseudoRET implicit $lr, implicit %156 +... +--- +name: test_sext_extract_s8 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_sext_extract_s8 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 3 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[SEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_sext_extract_s8 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 3 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[SEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 2) + %15:_(<16 x s8>) = G_BITCAST %14(<8 x s16>) + %158:_(<64 x s8>) = G_AIE_PAD_VECTOR_UNDEF %15(<16 x s8>) + %23:_(s32) = G_CONSTANT i32 3 + %156:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %158(<64 x s8>), %23(s32) + PseudoRET implicit $lr, implicit %156 +... +--- +name: test_sext_extract_s16 +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_sext_extract_s16 + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 1) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[SEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_sext_extract_s16 + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 1) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[SEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<4 x s32>) = G_LOAD %0(p0) :: (dereferenceable load (<4 x s32>), align 1) + %15:_(<8 x s16>) = G_BITCAST %14(<4 x s32>) + %158:_(<32 x s16>) = G_AIE_PAD_VECTOR_UNDEF %15(<8 x s16>) + %23:_(s32) = G_CONSTANT i32 4 + %156:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %158(<32 x s16>), %23(s32) + PseudoRET implicit $lr, implicit %156 + +... +--- +name: test_multiple_extracts_from_same_load +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_multiple_extracts_from_same_load + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 10 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 5 + ; AIE2-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s20) + ; AIE2-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s8)) + ; AIE2-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD2]], 8 + ; AIE2-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD1]], 8 + ; AIE2-NEXT: [[ASSERT_ZEXT2:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 8 + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32), implicit [[ASSERT_ZEXT1]](s32), implicit [[ASSERT_ZEXT2]](s32) + ; + ; AIE2P-LABEL: name: test_multiple_extracts_from_same_load + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 10 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2P-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 5 + ; AIE2P-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s8)) + ; AIE2P-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2P-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD2]], 8 + ; AIE2P-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD1]], 8 + ; AIE2P-NEXT: [[ASSERT_ZEXT2:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 8 + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32), implicit [[ASSERT_ZEXT1]](s32), implicit [[ASSERT_ZEXT2]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 2) + %15:_(<16 x s8>) = G_BITCAST %14(<8 x s16>) + %17:_(s32) = G_CONSTANT i32 0 + %16:_(s8) = G_EXTRACT_VECTOR_ELT %15(<16 x s8>), %17(s32) + %18:_(s32) = G_ZEXT %16(s8) + %19:_(s32) = G_CONSTANT i32 5 + %20:_(s8) = G_EXTRACT_VECTOR_ELT %15(<16 x s8>), %19(s32) + %21:_(s32) = G_ZEXT %20(s8) + %22:_(s32) = G_CONSTANT i32 10 + %23:_(s8) = G_EXTRACT_VECTOR_ELT %15(<16 x s8>), %22(s32) + %24:_(s32) = G_ZEXT %23(s8) + PseudoRET implicit $lr, implicit %18, implicit %21, implicit %24 +... +--- +name: test_extract_with_pad_vector +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_extract_with_pad_vector + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 7 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_extract_with_pad_vector + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 7 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 2) + %15:_(<16 x s8>) = G_BITCAST %14(<8 x s16>) + %158:_(<64 x s8>) = G_AIE_PAD_VECTOR_UNDEF %15(<16 x s8>) + %23:_(s32) = G_CONSTANT i32 7 + %156:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %158(<64 x s8>), %23(s32) + PseudoRET implicit $lr, implicit %156 +... +--- +name: test_mixed_plain_and_zext_extracts +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_mixed_plain_and_zext_extracts + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 1 + ; AIE2-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s20) + ; AIE2-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s8)) + ; AIE2-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD1]], 8 + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32), implicit [[ZEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_mixed_plain_and_zext_extracts + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2P-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 1 + ; AIE2P-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s8)) + ; AIE2P-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD1]], 8 + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32), implicit [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 2) + %15:_(<16 x s8>) = G_BITCAST %14(<8 x s16>) + %17:_(s32) = G_CONSTANT i32 1 + %16:_(s8) = G_EXTRACT_VECTOR_ELT %15(<16 x s8>), %17(s32) + %18:_(s32) = G_ZEXT %16(s8) + %158:_(<64 x s8>) = G_AIE_PAD_VECTOR_UNDEF %15(<16 x s8>) + %23:_(s32) = G_CONSTANT i32 8 + %156:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %158(<64 x s8>), %23(s32) + PseudoRET implicit $lr, implicit %18, implicit %156 +... +--- +name: test_s8_extract_from_align2_load +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s8_extract_from_align2_load + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 2 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 + ; AIE2-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s20) + ; AIE2-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD1]], 8 + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32), implicit [[ZEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_s8_extract_from_align2_load + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 2 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2P-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 + ; AIE2P-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s8), align 2) + ; AIE2P-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD1]], 8 + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32), implicit [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 2) + %15:_(<16 x s8>) = G_BITCAST %14(<8 x s16>) + %17:_(s32) = G_CONSTANT i32 4 + %16:_(s8) = G_EXTRACT_VECTOR_ELT %15(<16 x s8>), %17(s32) + %18:_(s32) = G_ZEXT %16(s8) + %158:_(<64 x s8>) = G_AIE_PAD_VECTOR_UNDEF %15(<16 x s8>) + %23:_(s32) = G_CONSTANT i32 2 + %156:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %158(<64 x s8>), %23(s32) + PseudoRET implicit $lr, implicit %18, implicit %156 +... +--- +name: test_s16_extract_unaligned +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_extract_unaligned + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 1) + ; AIE2-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 16 + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + ; + ; AIE2P-LABEL: name: test_s16_extract_unaligned + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 1) + ; AIE2P-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 16 + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + %0:_(p0) = COPY $p0 + %14:_(<4 x s32>) = G_LOAD %0(p0) :: (dereferenceable load (<4 x s32>), align 1) + %15:_(<8 x s16>) = G_BITCAST %14(<4 x s32>) + %17:_(s32) = G_CONSTANT i32 2 + %16:_(s16) = G_EXTRACT_VECTOR_ELT %15(<8 x s16>), %17(s32) + %18:_(s32) = G_ZEXT %16(s16) + PseudoRET implicit $lr, implicit %18 +... +--- +name: test_s16_direct_vector_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_direct_vector_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 6 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 1) + ; AIE2-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 16 + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + ; + ; AIE2P-LABEL: name: test_s16_direct_vector_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 6 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 1) + ; AIE2P-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 16 + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s16>), align 1) + %17:_(s32) = G_CONSTANT i32 3 + %16:_(s16) = G_EXTRACT_VECTOR_ELT %14(<8 x s16>), %17(s32) + %18:_(s32) = G_ZEXT %16(s16) + PseudoRET implicit $lr, implicit %18 +... +--- +name: test_s32_extract_from_bitcast +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s32_extract_from_bitcast + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32), align 2) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + ; + ; AIE2P-LABEL: name: test_s32_extract_from_bitcast + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32), align 2) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<4 x s64>) = G_LOAD %0(p0) :: (dereferenceable load (<4 x s64>), align 2) + %15:_(<8 x s32>) = G_BITCAST %14(<4 x s64>) + %17:_(s32) = G_CONSTANT i32 3 + %16:_(s32) = G_EXTRACT_VECTOR_ELT %15(<8 x s32>), %17(s32) + PseudoRET implicit $lr, implicit %16 +... +--- +name: test_s32_direct_vector_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s32_direct_vector_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32), align 2) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + ; + ; AIE2P-LABEL: name: test_s32_direct_vector_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32), align 2) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<8 x s32>) = G_LOAD %0(p0) :: (dereferenceable load (<8 x s32>), align 2) + %17:_(s32) = G_CONSTANT i32 5 + %16:_(s32) = G_EXTRACT_VECTOR_ELT %14(<8 x s32>), %17(s32) + PseudoRET implicit $lr, implicit %16 +... +--- +name: test_s16_32elem_direct_zext_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_32elem_direct_zext_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 4) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_s16_32elem_direct_zext_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 4) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 4) + %17:_(s32) = G_CONSTANT i32 10 + %156:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %14(<32 x s16>), %17(s32) + PseudoRET implicit $lr, implicit %156 +... +--- +name: test_s16_32elem_direct_sext_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_32elem_direct_sext_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 30 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[SEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_s16_32elem_direct_sext_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 30 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[SEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 4) + %17:_(s32) = G_CONSTANT i32 15 + %156:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %14(<32 x s16>), %17(s32) + PseudoRET implicit $lr, implicit %156 +... +--- +name: test_s16_32elem_bitcast_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s16_32elem_bitcast_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 4) + ; AIE2-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 8 + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + ; + ; AIE2P-LABEL: name: test_s16_32elem_bitcast_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s8), align 4) + ; AIE2P-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[ZEXTLOAD]], 8 + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s32) + %0:_(p0) = COPY $p0 + %14:_(<32 x s16>) = G_LOAD %0(p0) :: (dereferenceable load (<32 x s16>), align 4) + %15:_(<64 x s8>) = G_BITCAST %14(<32 x s16>) + %17:_(s32) = G_CONSTANT i32 12 + %16:_(s8) = G_EXTRACT_VECTOR_ELT %15(<64 x s8>), %17(s32) + %18:_(s32) = G_ZEXT %16(s8) + PseudoRET implicit $lr, implicit %18 +... +--- +name: test_s32_16elem_direct_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s32_16elem_direct_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 28 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + ; + ; AIE2P-LABEL: name: test_s32_16elem_direct_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 28 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[LOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<16 x s32>) = G_LOAD %0(p0) :: (dereferenceable load (<16 x s32>), align 4) + %17:_(s32) = G_CONSTANT i32 7 + %16:_(s32) = G_EXTRACT_VECTOR_ELT %14(<16 x s32>), %17(s32) + PseudoRET implicit $lr, implicit %16 +... +--- +name: test_s32_16elem_bitcast_zext_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s32_16elem_bitcast_zext_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 18 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16)) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_s32_16elem_bitcast_zext_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 18 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16)) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<16 x s32>) = G_LOAD %0(p0) :: (dereferenceable load (<16 x s32>), align 4) + %15:_(<32 x s16>) = G_BITCAST %14(<16 x s32>) + %17:_(s32) = G_CONSTANT i32 9 + %156:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %15(<32 x s16>), %17(s32) + PseudoRET implicit $lr, implicit %156 +... +--- +name: test_s32_16elem_bitcast_sext_extract +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + + ; AIE2-LABEL: name: test_s32_16elem_bitcast_sext_extract + ; AIE2: liveins: $p0 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 + ; AIE2-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 4) + ; AIE2-NEXT: PseudoRET implicit $lr, implicit [[SEXTLOAD]](s32) + ; + ; AIE2P-LABEL: name: test_s32_16elem_bitcast_sext_extract + ; AIE2P: liveins: $p0 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; AIE2P-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 + ; AIE2P-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; AIE2P-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p0) :: (dereferenceable load (s16), align 4) + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit [[SEXTLOAD]](s32) + %0:_(p0) = COPY $p0 + %14:_(<16 x s32>) = G_LOAD %0(p0) :: (dereferenceable load (<16 x s32>), align 4) + %15:_(<32 x s16>) = G_BITCAST %14(<16 x s32>) + %17:_(s32) = G_CONSTANT i32 12 + %156:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %15(<32 x s16>), %17(s32) + PseudoRET implicit $lr, implicit %156 +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/prelegalizer-combiner-extracted-vector-load.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/prelegalizer-combiner-extracted-vector-load.mir index 549514967300..0aca68a32ff5 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/prelegalizer-combiner-extracted-vector-load.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/prelegalizer-combiner-extracted-vector-load.mir @@ -47,8 +47,9 @@ body: | ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<32 x s16>) = G_AIE_BROADCAST_VECTOR [[LOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16), align 4) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[SEXTLOAD]], 16 + ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<32 x s16>) = G_AIE_BROADCAST_VECTOR [[ASSERT_SEXT]](s32) ; CHECK-NEXT: G_STORE [[AIE_BROADCAST_VECTOR]](<32 x s16>), [[COPY]](p0) :: (store (<32 x s16>) into `ptr addrspace(6) null`, addrspace 6) ; CHECK-NEXT: PseudoRET implicit $lr %3:_(<8 x s16>) = G_IMPLICIT_DEF @@ -75,8 +76,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 6 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16)) - ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<32 x s16>) = G_AIE_BROADCAST_VECTOR [[LOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p0) :: (load (s16) from `ptr null`) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[SEXTLOAD]], 16 + ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<32 x s16>) = G_AIE_BROADCAST_VECTOR [[ASSERT_SEXT]](s32) ; CHECK-NEXT: G_STORE [[AIE_BROADCAST_VECTOR]](<32 x s16>), [[COPY]](p0) :: (store (<32 x s16>) into `ptr addrspace(6) null`, addrspace 6) ; CHECK-NEXT: PseudoRET implicit $lr %3:_(<8 x s16>) = G_IMPLICIT_DEF