@@ -456,6 +456,109 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
456456 ElseTerm->eraseFromParent ();
457457}
458458
459+ static void createMemSetPatternLoop (Instruction *InsertBefore, Value *DstAddr,
460+ Value *CopyLen, Value *SetValue,
461+ Align DstAlign, bool IsVolatile) {
462+ BasicBlock *OrigBB = InsertBefore->getParent ();
463+ Function *F = OrigBB->getParent ();
464+ const DataLayout &DL = F->getDataLayout ();
465+
466+ if (DL.isBigEndian ())
467+ report_fatal_error (" memset_pattern.inline expansion not currently "
468+ " implemented for big-endian targets" ,
469+ false );
470+
471+ // To start with, let's assume SetValue is an i128 and bail out if it's not.
472+ if (!isPowerOf2_32 (SetValue->getType ()->getScalarSizeInBits ()))
473+ report_fatal_error (" Pattern width for memset_pattern must be a power of 2" ,
474+ false );
475+ unsigned PatternSize = SetValue->getType ()->getScalarSizeInBits () / 8 ;
476+
477+ Type *TypeOfCopyLen = CopyLen->getType ();
478+
479+ BasicBlock *NewBB = OrigBB->splitBasicBlock (InsertBefore, " split" );
480+ BasicBlock *LoopBB =
481+ BasicBlock::Create (F->getContext (), " storeloop" , F, NewBB);
482+ BasicBlock *RemCheckBB =
483+ BasicBlock::Create (F->getContext (), " remcheck" , F, NewBB);
484+ BasicBlock *RemainderLoopBB =
485+ BasicBlock::Create (F->getContext (), " remainderloop" , F, NewBB);
486+ IRBuilder<> Builder (OrigBB->getTerminator ());
487+
488+ ConstantInt *CILoopOpSize =
489+ ConstantInt::get (dyn_cast<IntegerType>(TypeOfCopyLen), PatternSize);
490+ Value *RuntimeLoopCount =
491+ getRuntimeLoopCount (DL, Builder, CopyLen, CILoopOpSize, PatternSize);
492+ Value *RuntimeRemainder =
493+ getRuntimeLoopRemainder (DL, Builder, CopyLen, CILoopOpSize, PatternSize);
494+
495+ Builder.CreateCondBr (Builder.CreateICmpEQ (ConstantInt::get (TypeOfCopyLen, 0 ),
496+ RuntimeLoopCount),
497+ RemCheckBB, LoopBB);
498+ OrigBB->getTerminator ()->eraseFromParent ();
499+
500+ IRBuilder<> LoopBuilder (LoopBB);
501+ PHINode *CurrentDst = LoopBuilder.CreatePHI (DstAddr->getType (), 0 );
502+ CurrentDst->addIncoming (DstAddr, OrigBB);
503+ PHINode *LoopCount = LoopBuilder.CreatePHI (TypeOfCopyLen, 0 );
504+ LoopCount->addIncoming (RuntimeLoopCount, OrigBB);
505+
506+ // Create the store instruction for the pattern
507+ LoopBuilder.CreateAlignedStore (SetValue, CurrentDst, DstAlign, IsVolatile);
508+
509+ Value *NextDst = LoopBuilder.CreateInBoundsGEP (
510+ SetValue->getType (), CurrentDst,
511+ ConstantInt::get (TypeOfCopyLen, PatternSize));
512+ CurrentDst->addIncoming (NextDst, LoopBB);
513+
514+ Value *NewLoopCount =
515+ LoopBuilder.CreateSub (LoopCount, ConstantInt::get (TypeOfCopyLen, 1 ));
516+ LoopCount->addIncoming (NewLoopCount, LoopBB);
517+
518+ LoopBuilder.CreateCondBr (
519+ LoopBuilder.CreateICmpNE (NewLoopCount,
520+ ConstantInt::get (TypeOfCopyLen, 0 )),
521+ LoopBB, RemCheckBB);
522+
523+ IRBuilder<> RemCheckBuilder (RemCheckBB, RemCheckBB->begin ());
524+ // Branch to the end if there are no remainder bytes.
525+ PHINode *RemainderDstPHI = RemCheckBuilder.CreatePHI (NextDst->getType (), 0 );
526+ RemainderDstPHI->addIncoming (DstAddr, OrigBB);
527+ RemainderDstPHI->addIncoming (NextDst, LoopBB);
528+ RemCheckBuilder.CreateCondBr (
529+ RemCheckBuilder.CreateICmpEQ (RuntimeRemainder,
530+ ConstantInt::get (TypeOfCopyLen, 0 )),
531+ NewBB, RemainderLoopBB);
532+
533+ // Remainder loop
534+ IRBuilder<> RemainderLoopBuilder (RemainderLoopBB);
535+ PHINode *ByteIndex = RemainderLoopBuilder.CreatePHI (TypeOfCopyLen, 0 );
536+ ByteIndex->addIncoming (ConstantInt::get (TypeOfCopyLen, 0 ), RemCheckBB);
537+ Type *TypeOfSetValue = SetValue->getType ();
538+ PHINode *ShiftedValue = RemainderLoopBuilder.CreatePHI (TypeOfSetValue, 0 );
539+ ShiftedValue->addIncoming (SetValue, RemCheckBB);
540+
541+ Value *ByteToStore = RemainderLoopBuilder.CreateTrunc (
542+ ShiftedValue, RemainderLoopBuilder.getInt8Ty ());
543+
544+ RemainderLoopBuilder.CreateStore (
545+ ByteToStore,
546+ RemainderLoopBuilder.CreateInBoundsGEP (RemainderLoopBuilder.getInt8Ty (),
547+ RemainderDstPHI, ByteIndex),
548+ IsVolatile);
549+
550+ Value *NewByteIndex = RemainderLoopBuilder.CreateAdd (
551+ ByteIndex, ConstantInt::get (TypeOfCopyLen, 1 ));
552+ ByteIndex->addIncoming (NewByteIndex, RemainderLoopBB);
553+ Value *NewShiftedValue = RemainderLoopBuilder.CreateLShr (
554+ ShiftedValue, ConstantInt::get (TypeOfSetValue, 8 ));
555+ ShiftedValue->addIncoming (NewShiftedValue, RemainderLoopBB);
556+
557+ RemainderLoopBuilder.CreateCondBr (
558+ RemainderLoopBuilder.CreateICmpULT (NewByteIndex, RuntimeRemainder),
559+ RemainderLoopBB, NewBB);
560+ }
561+
459562static void createMemSetLoop (Instruction *InsertBefore, Value *DstAddr,
460563 Value *CopyLen, Value *SetValue, Align DstAlign,
461564 bool IsVolatile) {
@@ -591,6 +694,16 @@ bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove,
591694}
592695
593696void llvm::expandMemSetAsLoop (MemSetInst *Memset) {
697+ if (isa<MemSetPatternInst>(Memset)) {
698+ return createMemSetPatternLoop (
699+ /* InsertBefore */ Memset,
700+ /* DstAddr */ Memset->getRawDest (),
701+ /* CopyLen */ Memset->getLength (),
702+ /* SetValue */ Memset->getValue (),
703+ /* Alignment */ Memset->getDestAlign ().valueOrOne (),
704+ Memset->isVolatile ());
705+ }
706+
594707 createMemSetLoop (/* InsertBefore */ Memset,
595708 /* DstAddr */ Memset->getRawDest (),
596709 /* CopyLen */ Memset->getLength (),
0 commit comments