@@ -4498,3 +4498,118 @@ bool llvm::matchUnalignedExtractLoad(MachineInstr &ExtractMI,
44984498
44994499 return true ;
45004500}
4501+
4502+ // / Match unaligned vector loads and transform them to use a better-aligned
4503+ // / element type based on the actual alignment.
4504+ // / Pattern:
4505+ // / %vec:_(<32 x s16>) = G_LOAD %ptr(p0) :: (align 4)
4506+ // / Converts to:
4507+ // / %vec_new:_(<16 x s32>) = G_LOAD %ptr(p0) :: (align 4)
4508+ // / %vec:_(<32 x s16>) = G_BITCAST %vec_new(<16 x s32>)
4509+ bool llvm::matchUnalignedVectorLoad (MachineInstr &LoadMI,
4510+ MachineRegisterInfo &MRI,
4511+ GISelChangeObserver &Observer,
4512+ BuildFnTy &MatchInfo) {
4513+ assert (LoadMI.getOpcode () == TargetOpcode::G_LOAD && " Expected G_LOAD" );
4514+
4515+ // Get load information
4516+ const Register DstReg = LoadMI.getOperand (0 ).getReg ();
4517+ const LLT DstTy = MRI.getType (DstReg);
4518+
4519+ // Only process vector loads
4520+ if (!DstTy.isVector ())
4521+ return false ;
4522+
4523+ // Check memory operand for alignment
4524+ if (LoadMI.memoperands_empty ())
4525+ return false ;
4526+
4527+ const MachineMemOperand *MMO = LoadMI.memoperands ().front ();
4528+ const unsigned Alignment = MMO->getAlign ().value ();
4529+
4530+ // Skip if the vector is already well-aligned (alignment >= vector size)
4531+ const unsigned VecSizeInBytes = DstTy.getSizeInBytes ();
4532+ if (Alignment >= VecSizeInBytes)
4533+ return false ;
4534+
4535+ // Get element type information
4536+ const LLT ElemTy = DstTy.getElementType ();
4537+ const unsigned ElemSizeInBits = ElemTy.getSizeInBits ();
4538+
4539+ // Skip if the load is only used for extracts - let matchUnalignedExtractLoad
4540+ // handle it. This prevents the two combiners from competing for the same
4541+ // opportunities
4542+ const MachineFunction &MF = *LoadMI.getMF ();
4543+ const AIEBaseInstrInfo &TII =
4544+ *static_cast <const AIEBaseInstrInfo *>(MF.getSubtarget ().getInstrInfo ());
4545+ const unsigned ZExtExtractOpcode =
4546+ TII.getGenericExtractVectorEltOpcode (false );
4547+ const unsigned SExtExtractOpcode = TII.getGenericExtractVectorEltOpcode (true );
4548+ const unsigned PadVectorOpcode = TII.getGenericPadVectorOpcode ();
4549+
4550+ if (areLoadUsesValidForExtractCombine (
4551+ DstReg, ZExtExtractOpcode, SExtExtractOpcode, PadVectorOpcode, MRI))
4552+ return false ;
4553+
4554+ // Skip if the load has a single user that is a G_STORE with the same
4555+ // alignment. This case can be perfectly scalarized during legalization
4556+ if (MRI.hasOneNonDBGUse (DstReg)) {
4557+ const MachineInstr *UserMI = &*MRI.use_instr_nodbg_begin (DstReg);
4558+ if (UserMI->getOpcode () == TargetOpcode::G_STORE) {
4559+ const GStore *StoreMI = cast<GStore>(UserMI);
4560+ if (!StoreMI->memoperands_empty ()) {
4561+ const MachineMemOperand *StoreMMO = StoreMI->memoperands ().front ();
4562+ // If store has the same alignment as the load, skip
4563+ if (StoreMMO->getAlign ().value () == Alignment)
4564+ return false ;
4565+ }
4566+ }
4567+ }
4568+
4569+ // We already have the best element size option.
4570+ if (Alignment == ElemSizeInBits / 8 )
4571+ return false ;
4572+
4573+ // Only handle s8 and s16 element types that can be promoted to s32
4574+ if (ElemSizeInBits != 8 && ElemSizeInBits != 16 )
4575+ return false ;
4576+
4577+ // Determine the optimal element type based on alignment
4578+ unsigned NewElemSizeInBits = 0 ;
4579+ if (Alignment >= 4 ) {
4580+ NewElemSizeInBits = 32 ;
4581+ } else if (Alignment >= 2 ) {
4582+ NewElemSizeInBits = 16 ;
4583+ } else {
4584+ // Alignment doesn't allow for a better element type
4585+ return false ;
4586+ }
4587+
4588+ // Check if the vector size is compatible with the new element size
4589+ const unsigned VecSizeInBits = DstTy.getSizeInBits ();
4590+ if (VecSizeInBits % NewElemSizeInBits != 0 )
4591+ return false ;
4592+
4593+ MatchInfo = [=, PtrReg = LoadMI.getOperand (1 ).getReg (), &MRI,
4594+ &Observer](MachineIRBuilder &B) {
4595+ MachineFunction &MF = B.getMF ();
4596+
4597+ // Calculate new number of elements
4598+ const unsigned NewNumElems = VecSizeInBits / NewElemSizeInBits;
4599+
4600+ // Create the new vector type with better-aligned elements
4601+ const LLT NewVecTy = LLT::fixed_vector (NewNumElems, NewElemSizeInBits);
4602+ const Register NewLoadReg = MRI.createGenericVirtualRegister (NewVecTy);
4603+
4604+ // Create a new MMO with the same properties but updated type
4605+ MachineMemOperand *NewMMO = MF.getMachineMemOperand (
4606+ MMO->getPointerInfo (), MMO->getFlags (), NewVecTy, MMO->getAlign ());
4607+
4608+ Observer.createdInstr (*B.buildLoad (NewLoadReg, PtrReg, *NewMMO));
4609+
4610+ // Bitcast back to the original type
4611+ Observer.createdInstr (*B.buildBitcast (DstReg, NewLoadReg));
4612+ };
4613+
4614+ return true ;
4615+ }
0 commit comments