@@ -665,19 +665,22 @@ func (c *Compiler) lowerCurrentOpcode() {
665665 tableBaseAddr := c .loadTableBaseAddr (tableInstancePtr )
666666 addr := builder .AllocateInstruction ().AsIadd (tableBaseAddr , offsetInBytes ).Insert (builder ).Return ()
667667
668- // Prepare the loop and following block.
669- beforeLoop := builder .AllocateBasicBlock ()
670- loopBlk := builder .AllocateBasicBlock ()
671- loopVar := loopBlk .AddParam (builder , ssa .TypeI64 )
672- followingBlk := builder .AllocateBasicBlock ()
673-
674668 // Uses the copy trick for faster filling buffer like memory.fill, but in this case we copy 8 bytes at a time.
669+ // Tables are rarely huge, so ignore the 8KB maximum.
670+ // https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517
671+ //
675672 // buf := memoryInst.Buffer[offset : offset+fillSize]
676673 // buf[0:8] = value
677674 // for i := 8; i < fillSize; i *= 2 { Begin with 8 bytes.
678675 // copy(buf[i:], buf[:i])
679676 // }
680677
678+ // Prepare the loop and following block.
679+ beforeLoop := builder .AllocateBasicBlock ()
680+ loopBlk := builder .AllocateBasicBlock ()
681+ loopVar := loopBlk .AddParam (builder , ssa .TypeI64 )
682+ followingBlk := builder .AllocateBasicBlock ()
683+
681684 // Insert the jump to the beforeLoop block; If the fillSize is zero, then jump to the following block to skip entire logics.
682685 zero := builder .AllocateInstruction ().AsIconst64 (0 ).Insert (builder ).Return ()
683686 ifFillSizeZero := builder .AllocateInstruction ().AsIcmp (fillSizeExt , zero , ssa .IntegerCmpCondEqual ).
@@ -688,32 +691,24 @@ func (c *Compiler) lowerCurrentOpcode() {
688691 // buf[0:8] = value
689692 builder .SetCurrentBlock (beforeLoop )
690693 builder .AllocateInstruction ().AsStore (ssa .OpcodeStore , value , addr , 0 ).Insert (builder )
691- initValue := builder .AllocateInstruction ().AsIconst64 (8 ).Insert (builder ).Return ()
692- c .insertJumpToBlock (c .allocateVarLengthValues (1 , initValue ), loopBlk )
694+ eight := builder .AllocateInstruction ().AsIconst64 (8 ).Insert (builder ).Return ()
695+ c .insertJumpToBlock (c .allocateVarLengthValues (1 , eight ), loopBlk )
693696
694697 builder .SetCurrentBlock (loopBlk )
695698 dstAddr := builder .AllocateInstruction ().AsIadd (addr , loopVar ).Insert (builder ).Return ()
696699
697- // If loopVar*2 > fillSizeInBytes, then count must be fillSizeInBytes-loopVar.
698- var count ssa.Value
699- {
700- loopVarDoubled := builder .AllocateInstruction ().AsIadd (loopVar , loopVar ).Insert (builder ).Return ()
701- loopVarDoubledLargerThanFillSize := builder .
702- AllocateInstruction ().AsIcmp (loopVarDoubled , fillSizeInBytes , ssa .IntegerCmpCondUnsignedGreaterThanOrEqual ).
703- Insert (builder ).Return ()
704- diff := builder .AllocateInstruction ().AsIsub (fillSizeInBytes , loopVar ).Insert (builder ).Return ()
705- count = builder .AllocateInstruction ().AsSelect (loopVarDoubledLargerThanFillSize , diff , loopVar ).Insert (builder ).Return ()
706- }
700+ newLoopVar := builder .AllocateInstruction ().AsIadd (loopVar , loopVar ).Insert (builder ).Return ()
701+ newLoopVarLessThanFillSize := builder .AllocateInstruction ().
702+ AsIcmp (newLoopVar , fillSizeInBytes , ssa .IntegerCmpCondUnsignedLessThan ).Insert (builder ).Return ()
707703
708- c .callMemmove (dstAddr , addr , count )
704+ // On the last iteration, count must be fillSizeInBytes-loopVar.
705+ diff := builder .AllocateInstruction ().AsIsub (fillSizeInBytes , loopVar ).Insert (builder ).Return ()
706+ count := builder .AllocateInstruction ().AsSelect (newLoopVarLessThanFillSize , loopVar , diff ).Insert (builder ).Return ()
709707
710- shiftAmount := builder .AllocateInstruction ().AsIconst64 (1 ).Insert (builder ).Return ()
711- newLoopVar := builder .AllocateInstruction ().AsIshl (loopVar , shiftAmount ).Insert (builder ).Return ()
712- loopVarLessThanFillSize := builder .AllocateInstruction ().
713- AsIcmp (newLoopVar , fillSizeInBytes , ssa .IntegerCmpCondUnsignedLessThan ).Insert (builder ).Return ()
708+ c .callMemmove (dstAddr , addr , count )
714709
715710 builder .AllocateInstruction ().
716- AsBrnz (loopVarLessThanFillSize , c .allocateVarLengthValues (1 , newLoopVar ), loopBlk ).
711+ AsBrnz (newLoopVarLessThanFillSize , c .allocateVarLengthValues (1 , newLoopVar ), loopBlk ).
717712 Insert (builder )
718713
719714 c .insertJumpToBlock (ssa .ValuesNil , followingBlk )
@@ -741,11 +736,15 @@ func (c *Compiler) lowerCurrentOpcode() {
741736 // Calculate the base address:
742737 addr := builder .AllocateInstruction ().AsIadd (c .getMemoryBaseValue (false ), offset ).Insert (builder ).Return ()
743738
744- // Uses the copy trick for faster filling buffer: https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
739+ // Uses the copy trick for faster filling buffer, with a maximum chunk size of 8KB.
740+ // https://github.com/golang/go/blob/go1.24.0/src/bytes/bytes.go#L664-L673
741+ //
745742 // buf := memoryInst.Buffer[offset : offset+fillSize]
746743 // buf[0] = value
747- // for i := 1; i < fillSize; i *= 2 {
748- // copy(buf[i:], buf[:i])
744+ // for i := 1; i < fillSize; {
745+ // chunk := ((i - 1) & 8191) + 1
746+ // copy(buf[i:], buf[:chunk])
747+ // i += chunk
749748 // }
750749
751750 // Prepare the loop and following block.
@@ -764,32 +763,31 @@ func (c *Compiler) lowerCurrentOpcode() {
764763 // buf[0] = value
765764 builder .SetCurrentBlock (beforeLoop )
766765 builder .AllocateInstruction ().AsStore (ssa .OpcodeIstore8 , value , addr , 0 ).Insert (builder )
767- initValue := builder .AllocateInstruction ().AsIconst64 (1 ).Insert (builder ).Return ()
768- c .insertJumpToBlock (c .allocateVarLengthValues (1 , initValue ), loopBlk )
766+ one := builder .AllocateInstruction ().AsIconst64 (1 ).Insert (builder ).Return ()
767+ c .insertJumpToBlock (c .allocateVarLengthValues (1 , one ), loopBlk )
769768
770769 builder .SetCurrentBlock (loopBlk )
771770 dstAddr := builder .AllocateInstruction ().AsIadd (addr , loopVar ).Insert (builder ).Return ()
772771
773- // If loopVar*2 > fillSizeExt, then count must be fillSizeExt-loopVar.
774- var count ssa.Value
775- {
776- loopVarDoubled := builder .AllocateInstruction ().AsIadd (loopVar , loopVar ).Insert (builder ).Return ()
777- loopVarDoubledLargerThanFillSize := builder .
778- AllocateInstruction ().AsIcmp (loopVarDoubled , fillSize , ssa .IntegerCmpCondUnsignedGreaterThanOrEqual ).
779- Insert (builder ).Return ()
780- diff := builder .AllocateInstruction ().AsIsub (fillSize , loopVar ).Insert (builder ).Return ()
781- count = builder .AllocateInstruction ().AsSelect (loopVarDoubledLargerThanFillSize , diff , loopVar ).Insert (builder ).Return ()
782- }
783-
784- c .callMemmove (dstAddr , addr , count )
772+ // chunk := ((i - 1) & 8191) + 1
773+ mask := builder .AllocateInstruction ().AsIconst64 (16383 ).Insert (builder ).Return ()
774+ tmp1 := builder .AllocateInstruction ().AsIsub (loopVar , one ).Insert (builder ).Return ()
775+ tmp2 := builder .AllocateInstruction ().AsBand (tmp1 , mask ).Insert (builder ).Return ()
776+ chunk := builder .AllocateInstruction ().AsIadd (tmp2 , one ).Insert (builder ).Return ()
785777
786- shiftAmount := builder . AllocateInstruction (). AsIconst64 ( 1 ). Insert ( builder ). Return ()
787- newLoopVar := builder .AllocateInstruction ().AsIshl (loopVar , shiftAmount ).Insert (builder ).Return ()
788- loopVarLessThanFillSize := builder .AllocateInstruction ().
778+ // i += chunk
779+ newLoopVar := builder .AllocateInstruction ().AsIadd (loopVar , chunk ).Insert (builder ).Return ()
780+ newLoopVarLessThanFillSize := builder .AllocateInstruction ().
789781 AsIcmp (newLoopVar , fillSize , ssa .IntegerCmpCondUnsignedLessThan ).Insert (builder ).Return ()
790782
783+ // count = min(chunk, fillSize-loopVar)
784+ diff := builder .AllocateInstruction ().AsIsub (fillSize , loopVar ).Insert (builder ).Return ()
785+ count := builder .AllocateInstruction ().AsSelect (newLoopVarLessThanFillSize , chunk , diff ).Insert (builder ).Return ()
786+
787+ c .callMemmove (dstAddr , addr , count )
788+
791789 builder .AllocateInstruction ().
792- AsBrnz (loopVarLessThanFillSize , c .allocateVarLengthValues (1 , newLoopVar ), loopBlk ).
790+ AsBrnz (newLoopVarLessThanFillSize , c .allocateVarLengthValues (1 , newLoopVar ), loopBlk ).
793791 Insert (builder )
794792
795793 c .insertJumpToBlock (ssa .ValuesNil , followingBlk )
0 commit comments