44 "errors"
55 "math"
66 "math/bits"
7+ "unsafe"
78)
89
910type Unsigned interface {
@@ -20,26 +21,57 @@ type BinaryFuse[T Unsigned] struct {
2021 Fingerprints []T
2122}
2223
23- // NewBinaryFuse fills the filter with provided keys. For best results,
24- // the caller should avoid having too many duplicated keys.
24+ // NewBinaryFuse creates a binary fuse filter with provided keys. For best
25+ // results, the caller should avoid having too many duplicated keys.
26+ //
27+ // The function can mutate the given keys slice to remove duplicates.
28+ //
2529// The function may return an error if the set is empty.
2630func NewBinaryFuse [T Unsigned ](keys []uint64 ) (* BinaryFuse [T ], error ) {
31+ var b BinaryFuseBuilder
32+ filter , err := BuildBinaryFuse [T ](& b , keys )
33+ if err != nil {
34+ return nil , err
35+ }
36+ return & filter , nil
37+ }
38+
39+ // BinaryFuseBuilder can be used to reuse memory allocations across multiple
40+ // BinaryFuse builds.
41+ type BinaryFuseBuilder struct {
42+ alone reusableBuffer
43+ t2hash reusableBuffer
44+ reverseOrder reusableBuffer
45+ t2count reusableBuffer
46+ reverseH reusableBuffer
47+ startPos reusableBuffer
48+ fingerprints reusableBuffer
49+ }
50+
51+ // BuildBinaryFuse creates a binary fuse filter with provided keys, reusing
52+ // buffers from the BinaryFuseBuilder if possible. For best results, the caller
53+ // should avoid having too many duplicated keys.
54+ //
55+ // The function can mutate the given keys slice to remove duplicates.
56+ //
57+ // The function may return an error if the set is empty.
58+ func BuildBinaryFuse [T Unsigned ](b * BinaryFuseBuilder , keys []uint64 ) (BinaryFuse [T ], error ) {
2759 size := uint32 (len (keys ))
28- filter := & BinaryFuse [T ]{}
29- filter .initializeParameters (size )
60+ var filter BinaryFuse [T ]
61+ filter .initializeParameters (b , size )
3062 rngcounter := uint64 (1 )
3163 filter .Seed = splitmix64 (& rngcounter )
3264 capacity := uint32 (len (filter .Fingerprints ))
3365
34- alone := make ([] uint32 , capacity )
66+ alone := reuseBuffer [ uint32 ]( & b . alone , int ( capacity ) )
3567 // the lowest 2 bits are the h index (0, 1, or 2)
3668 // so we only have 6 bits for counting;
3769 // but that's sufficient
38- t2count := make ([] T , capacity )
39- reverseH := make ([] T , size )
70+ t2count := reuseBuffer [ T ]( & b . t2count , int ( capacity ) )
71+ reverseH := reuseBuffer [ T ]( & b . reverseH , int ( size ) )
4072
41- t2hash := make ([] uint64 , capacity )
42- reverseOrder := make ([] uint64 , size + 1 )
73+ t2hash := reuseBuffer [ uint64 ]( & b . t2hash , int ( capacity ) )
74+ reverseOrder := reuseBuffer [ uint64 ]( & b . reverseOrder , int ( size + 1 ) )
4375 reverseOrder [size ] = 1
4476
4577 // the array h0, h1, h2, h0, h1, h2
@@ -50,16 +82,16 @@ func NewBinaryFuse[T Unsigned](keys []uint64) (*BinaryFuse[T], error) {
5082 for {
5183 iterations += 1
5284 if iterations > MaxIterations {
53- // The probability of this happening is lower than the
54- // the cosmic-ray probability (i.e., a cosmic ray corrupts your system).
55- return nil , errors .New ("too many iterations" )
85+ // The probability of this happening is lower than the cosmic-ray
86+ // probability (i.e., a cosmic ray corrupts your system).
87+ return BinaryFuse [ T ]{} , errors .New ("too many iterations" )
5688 }
5789
5890 blockBits := 1
5991 for (1 << blockBits ) < filter .SegmentCount {
6092 blockBits += 1
6193 }
62- startPos := make ([] uint , 1 << blockBits )
94+ startPos := reuseBuffer [ uint ]( & b . startPos , 1 << blockBits )
6395 for i := range startPos {
6496 // important: we do not want i * size to overflow!!!
6597 startPos [i ] = uint ((uint64 (i ) * uint64 (size )) >> blockBits )
@@ -216,7 +248,7 @@ func NewBinaryFuse[T Unsigned](keys []uint64) (*BinaryFuse[T], error) {
216248 return filter , nil
217249}
218250
219- func (filter * BinaryFuse [T ]) initializeParameters (size uint32 ) {
251+ func (filter * BinaryFuse [T ]) initializeParameters (b * BinaryFuseBuilder , size uint32 ) {
220252 arity := uint32 (3 )
221253 filter .SegmentLength = calculateSegmentLength (arity , size )
222254 if filter .SegmentLength > 262144 {
@@ -238,7 +270,7 @@ func (filter *BinaryFuse[T]) initializeParameters(size uint32) {
238270 }
239271 arrayLength = (filter .SegmentCount + arity - 1 ) * filter .SegmentLength
240272 filter .SegmentCountLength = filter .SegmentCount * filter .SegmentLength
241- filter .Fingerprints = make ([] T , arrayLength )
273+ filter .Fingerprints = reuseBuffer [ T ]( & b . fingerprints , int ( arrayLength ) )
242274}
243275
244276func (filter * BinaryFuse [T ]) mod3 (x T ) T {
@@ -292,3 +324,30 @@ func calculateSizeFactor(arity uint32, size uint32) float64 {
292324 return 2.0
293325 }
294326}
327+
328+ // reusableBuffer allows reuse of a backing buffer to avoid allocations for
329+ // slices of integers.
330+ type reusableBuffer struct {
331+ buf []uint64
332+ }
333+
334+ type integer interface {
335+ ~ int | ~ int8 | ~ int16 | ~ int32 | ~ int64 | ~ uint | ~ uint8 | ~ uint16 | ~ uint32 | ~ uint64
336+ }
337+
338+ // reuseBuffer returns an empty slice of the given size, reusing the last buffer
339+ // if possible.
340+ func reuseBuffer [T integer ](b * reusableBuffer , size int ) []T {
341+ const sizeOfUint64 = 8
342+ // Our backing buffer is a []uint64. Figure out how many uint64s we need
343+ // to back a []T of the requested size.
344+ bufSize := int ((uintptr (size )* unsafe .Sizeof (T (0 )) + sizeOfUint64 - 1 ) / sizeOfUint64 )
345+ if cap (b .buf ) >= bufSize {
346+ clear (b .buf [:bufSize ])
347+ } else {
348+ // We need to allocate a new buffer. Increase by at least 25% to amortize
349+ // allocations; this is what append() does for large enough slices.
350+ b .buf = make ([]uint64 , max (bufSize , cap (b .buf )+ cap (b .buf )/ 4 ))
351+ }
352+ return unsafe .Slice ((* T )(unsafe .Pointer (unsafe .SliceData (b .buf ))), size )
353+ }
0 commit comments