Skip to content

Commit 64d55c9

Browse files
ncrucesalehander92
authored andcommitted
Optimize/streamline fill operations (wazero#2395)
1 parent ed749f7 commit 64d55c9

File tree

3 files changed

+59
-52
lines changed

3 files changed

+59
-52
lines changed

internal/engine/interpreter/interpreter.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,12 +1885,17 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
18851885
if fillSize+offset > uint64(len(memoryInst.Buffer)) {
18861886
panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
18871887
} else if fillSize != 0 {
1888-
// Uses the copy trick for faster filling buffer.
1889-
// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
1888+
// Uses the copy trick for faster filling the buffer with the value.
1889+
// https://github.com/golang/go/blob/go1.24.0/src/bytes/bytes.go#L664-L673
18901890
buf := memoryInst.Buffer[offset : offset+fillSize]
1891-
buf[0] = value
1892-
for i := 1; i < len(buf); i *= 2 {
1893-
copy(buf[i:], buf[:i])
1891+
if value == 0 {
1892+
clear(buf)
1893+
} else {
1894+
buf[0] = value
1895+
for i := 1; i < len(buf); {
1896+
chunk := min(i, 8192)
1897+
i += copy(buf[i:], buf[:chunk])
1898+
}
18941899
}
18951900
}
18961901
frame.pc++
@@ -1964,7 +1969,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
19641969
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
19651970
} else if num > 0 {
19661971
// Uses the copy trick for faster filling the region with the value.
1967-
// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
1972+
// https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517
19681973
targetRegion := table.References[offset : offset+num]
19691974
targetRegion[0] = ref
19701975
for i := 1; i < len(targetRegion); i *= 2 {

internal/engine/wazevo/frontend/lower.go

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -665,19 +665,22 @@ func (c *Compiler) lowerCurrentOpcode() {
665665
tableBaseAddr := c.loadTableBaseAddr(tableInstancePtr)
666666
addr := builder.AllocateInstruction().AsIadd(tableBaseAddr, offsetInBytes).Insert(builder).Return()
667667

668-
// Prepare the loop and following block.
669-
beforeLoop := builder.AllocateBasicBlock()
670-
loopBlk := builder.AllocateBasicBlock()
671-
loopVar := loopBlk.AddParam(builder, ssa.TypeI64)
672-
followingBlk := builder.AllocateBasicBlock()
673-
674668
// Uses the copy trick for faster filling buffer like memory.fill, but in this case we copy 8 bytes at a time.
669+
// Tables are rarely huge, so ignore the 8KB maximum.
670+
// https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517
671+
//
675672
// buf := memoryInst.Buffer[offset : offset+fillSize]
676673
// buf[0:8] = value
677674
// for i := 8; i < fillSize; i *= 2 { Begin with 8 bytes.
678675
// copy(buf[i:], buf[:i])
679676
// }
680677

678+
// Prepare the loop and following block.
679+
beforeLoop := builder.AllocateBasicBlock()
680+
loopBlk := builder.AllocateBasicBlock()
681+
loopVar := loopBlk.AddParam(builder, ssa.TypeI64)
682+
followingBlk := builder.AllocateBasicBlock()
683+
681684
// Insert the jump to the beforeLoop block; If the fillSize is zero, then jump to the following block to skip entire logics.
682685
zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return()
683686
ifFillSizeZero := builder.AllocateInstruction().AsIcmp(fillSizeExt, zero, ssa.IntegerCmpCondEqual).
@@ -688,32 +691,24 @@ func (c *Compiler) lowerCurrentOpcode() {
688691
// buf[0:8] = value
689692
builder.SetCurrentBlock(beforeLoop)
690693
builder.AllocateInstruction().AsStore(ssa.OpcodeStore, value, addr, 0).Insert(builder)
691-
initValue := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return()
692-
c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk)
694+
eight := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return()
695+
c.insertJumpToBlock(c.allocateVarLengthValues(1, eight), loopBlk)
693696

694697
builder.SetCurrentBlock(loopBlk)
695698
dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return()
696699

697-
// If loopVar*2 > fillSizeInBytes, then count must be fillSizeInBytes-loopVar.
698-
var count ssa.Value
699-
{
700-
loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
701-
loopVarDoubledLargerThanFillSize := builder.
702-
AllocateInstruction().AsIcmp(loopVarDoubled, fillSizeInBytes, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual).
703-
Insert(builder).Return()
704-
diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return()
705-
count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return()
706-
}
700+
newLoopVar := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
701+
newLoopVarLessThanFillSize := builder.AllocateInstruction().
702+
AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
707703

708-
c.callMemmove(dstAddr, addr, count)
704+
// On the last iteration, count must be fillSizeInBytes-loopVar.
705+
diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return()
706+
count := builder.AllocateInstruction().AsSelect(newLoopVarLessThanFillSize, loopVar, diff).Insert(builder).Return()
709707

710-
shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
711-
newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return()
712-
loopVarLessThanFillSize := builder.AllocateInstruction().
713-
AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
708+
c.callMemmove(dstAddr, addr, count)
714709

715710
builder.AllocateInstruction().
716-
AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
711+
AsBrnz(newLoopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
717712
Insert(builder)
718713

719714
c.insertJumpToBlock(ssa.ValuesNil, followingBlk)
@@ -741,11 +736,15 @@ func (c *Compiler) lowerCurrentOpcode() {
741736
// Calculate the base address:
742737
addr := builder.AllocateInstruction().AsIadd(c.getMemoryBaseValue(false), offset).Insert(builder).Return()
743738

744-
// Uses the copy trick for faster filling buffer: https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
739+
// Uses the copy trick for faster filling buffer, with a maximum chunk size of 8KB.
740+
// https://github.com/golang/go/blob/go1.24.0/src/bytes/bytes.go#L664-L673
741+
//
745742
// buf := memoryInst.Buffer[offset : offset+fillSize]
746743
// buf[0] = value
747-
// for i := 1; i < fillSize; i *= 2 {
748-
// copy(buf[i:], buf[:i])
744+
// for i := 1; i < fillSize; {
745+
// chunk := ((i - 1) & 8191) + 1
746+
// copy(buf[i:], buf[:chunk])
747+
// i += chunk
749748
// }
750749

751750
// Prepare the loop and following block.
@@ -764,32 +763,31 @@ func (c *Compiler) lowerCurrentOpcode() {
764763
// buf[0] = value
765764
builder.SetCurrentBlock(beforeLoop)
766765
builder.AllocateInstruction().AsStore(ssa.OpcodeIstore8, value, addr, 0).Insert(builder)
767-
initValue := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
768-
c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk)
766+
one := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
767+
c.insertJumpToBlock(c.allocateVarLengthValues(1, one), loopBlk)
769768

770769
builder.SetCurrentBlock(loopBlk)
771770
dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return()
772771

773-
// If loopVar*2 > fillSizeExt, then count must be fillSizeExt-loopVar.
774-
var count ssa.Value
775-
{
776-
loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
777-
loopVarDoubledLargerThanFillSize := builder.
778-
AllocateInstruction().AsIcmp(loopVarDoubled, fillSize, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual).
779-
Insert(builder).Return()
780-
diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return()
781-
count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return()
782-
}
783-
784-
c.callMemmove(dstAddr, addr, count)
772+
// chunk := ((i - 1) & 8191) + 1
773+
mask := builder.AllocateInstruction().AsIconst64(16383).Insert(builder).Return()
774+
tmp1 := builder.AllocateInstruction().AsIsub(loopVar, one).Insert(builder).Return()
775+
tmp2 := builder.AllocateInstruction().AsBand(tmp1, mask).Insert(builder).Return()
776+
chunk := builder.AllocateInstruction().AsIadd(tmp2, one).Insert(builder).Return()
785777

786-
shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
787-
newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return()
788-
loopVarLessThanFillSize := builder.AllocateInstruction().
778+
// i += chunk
779+
newLoopVar := builder.AllocateInstruction().AsIadd(loopVar, chunk).Insert(builder).Return()
780+
newLoopVarLessThanFillSize := builder.AllocateInstruction().
789781
AsIcmp(newLoopVar, fillSize, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
790782

783+
// count = min(chunk, fillSize-loopVar)
784+
diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return()
785+
count := builder.AllocateInstruction().AsSelect(newLoopVarLessThanFillSize, chunk, diff).Insert(builder).Return()
786+
787+
c.callMemmove(dstAddr, addr, count)
788+
791789
builder.AllocateInstruction().
792-
AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
790+
AsBrnz(newLoopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
793791
Insert(builder)
794792

795793
c.insertJumpToBlock(ssa.ValuesNil, followingBlk)

internal/wasm/table.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,10 +326,14 @@ func (t *TableInstance) Grow(delta uint32, initialRef Reference) (currentLen uin
326326
newLen >= math.MaxUint32 || (t.Max != nil && newLen > int64(*t.Max)) {
327327
return 0xffffffff // = -1 in signed 32-bit integer.
328328
}
329+
329330
t.References = append(t.References, make([]uintptr, delta)...)
331+
if initialRef == 0 {
332+
return
333+
}
330334

331335
// Uses the copy trick for faster filling the new region with the initial value.
332-
// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
336+
// https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517
333337
newRegion := t.References[currentLen:]
334338
newRegion[0] = initialRef
335339
for i := 1; i < len(newRegion); i *= 2 {

0 commit comments

Comments
 (0)