@@ -369,11 +369,17 @@ type flushable struct {
369369 totDocs uint64
370370}
371371
372- var DefaultNumPersisterWorkers = 4
372+ var DefaultNumPersisterWorkers = 1
373373
374374// maximum size of data that a single worker is allowed to perform the in-memory
375375// merge operation.
376- var DefaultMaxSizeInMemoryMerge = 200 * 1024 * 1024
376+ var DefaultMaxSizeInMemoryMerge = 0
377+
378+ func legacyFlushBehaviour () bool {
379+ // DefaultMaxSizeInMemoryMerge = 0 is a special value to preserve the leagcy
380+ // one-shot in-memory merge + flush behaviour.
381+ return DefaultMaxSizeInMemoryMerge == 0 && DefaultNumPersisterWorkers == 1
382+ }
377383
378384// persistSnapshotMaybeMerge examines the snapshot and might merge and
379385// persist the in-memory zap segments if there are enough of them
@@ -390,63 +396,84 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
390396 var numSegsToFlushOut int
391397 var totDocs uint64
392398
393- for i , snapshot := range snapshot .segment {
394- if totSize >= DefaultMaxSizeInMemoryMerge {
395- if len (sbs ) >= DefaultMinSegmentsForInMemoryMerge {
396- numSegsToFlushOut += len (sbs )
397- val := & flushable {
398- segments : make ([]segment.Segment , len (sbs )),
399- drops : make ([]* roaring.Bitmap , len (sbsDrops )),
400- sbIdxs : make ([]int , len (sbsIndexes )),
401- totDocs : totDocs ,
402- }
403- copy (val .segments , sbs )
404- copy (val .drops , sbsDrops )
405- copy (val .sbIdxs , sbsIndexes )
406- flushSet = append (flushSet , val )
407-
408- oldSegIdxs = append (oldSegIdxs , sbsIndexes ... )
409- sbs = sbs [:0 ]
410- sbsDrops = sbsDrops [:0 ]
411- sbsIndexes = sbsIndexes [:0 ]
412- totSize = 0
413- totDocs = 0
399+ // legacy behaviour of merge + flush of all in-memory segments in one-shot
400+ if legacyFlushBehaviour () {
401+ val := & flushable {
402+ segments : make ([]segment.Segment , 0 ),
403+ drops : make ([]* roaring.Bitmap , 0 ),
404+ sbIdxs : make ([]int , 0 ),
405+ totDocs : totDocs ,
406+ }
407+ for i , snapshot := range snapshot .segment {
408+ if _ , ok := snapshot .segment .(segment.PersistedSegment ); ! ok {
409+ val .segments = append (val .segments , snapshot .segment )
410+ val .drops = append (val .drops , snapshot .deleted )
411+ val .sbIdxs = append (val .sbIdxs , i )
412+ oldSegIdxs = append (oldSegIdxs , i )
413+ val .totDocs += snapshot .segment .Count ()
414+ numSegsToFlushOut ++
414415 }
415416 }
416417
417- if len (flushSet ) >= DefaultNumPersisterWorkers {
418- break
419- }
418+ flushSet = append (flushSet , val )
419+ } else {
420+ for i , snapshot := range snapshot .segment {
421+ if totSize >= DefaultMaxSizeInMemoryMerge {
422+ if len (sbs ) >= DefaultMinSegmentsForInMemoryMerge {
423+ numSegsToFlushOut += len (sbs )
424+ val := & flushable {
425+ segments : make ([]segment.Segment , len (sbs )),
426+ drops : make ([]* roaring.Bitmap , len (sbsDrops )),
427+ sbIdxs : make ([]int , len (sbsIndexes )),
428+ totDocs : totDocs ,
429+ }
430+ copy (val .segments , sbs )
431+ copy (val .drops , sbsDrops )
432+ copy (val .sbIdxs , sbsIndexes )
433+ flushSet = append (flushSet , val )
434+
435+ oldSegIdxs = append (oldSegIdxs , sbsIndexes ... )
436+ sbs = sbs [:0 ]
437+ sbsDrops = sbsDrops [:0 ]
438+ sbsIndexes = sbsIndexes [:0 ]
439+ totSize = 0
440+ totDocs = 0
441+ }
442+ }
420443
421- if _ , ok := snapshot .segment .(segment.PersistedSegment ); ! ok {
422- sbs = append (sbs , snapshot .segment )
423- sbsDrops = append (sbsDrops , snapshot .deleted )
424- sbsIndexes = append (sbsIndexes , i )
425- totDocs += snapshot .segment .Count ()
426- totSize += snapshot .segment .Size ()
427- }
428- }
444+ if len (flushSet ) >= DefaultNumPersisterWorkers {
445+ break
446+ }
429447
430- // if there were too few segments just merge them all as part of a single worker
431- if len (flushSet ) < DefaultNumPersisterWorkers {
432- numSegsToFlushOut += len (sbs )
433- val := & flushable {
434- segments : make ([]segment.Segment , len (sbs )),
435- drops : make ([]* roaring.Bitmap , len (sbsDrops )),
436- sbIdxs : make ([]int , len (sbsIndexes )),
437- totDocs : totDocs ,
448+ if _ , ok := snapshot .segment .(segment.PersistedSegment ); ! ok {
449+ sbs = append (sbs , snapshot .segment )
450+ sbsDrops = append (sbsDrops , snapshot .deleted )
451+ sbsIndexes = append (sbsIndexes , i )
452+ totDocs += snapshot .segment .Count ()
453+ totSize += snapshot .segment .Size ()
454+ }
455+ }
456+ // if there were too few segments just merge them all as part of a single worker
457+ if len (flushSet ) < DefaultNumPersisterWorkers {
458+ numSegsToFlushOut += len (sbs )
459+ val := & flushable {
460+ segments : make ([]segment.Segment , len (sbs )),
461+ drops : make ([]* roaring.Bitmap , len (sbsDrops )),
462+ sbIdxs : make ([]int , len (sbsIndexes )),
463+ totDocs : totDocs ,
464+ }
465+ copy (val .segments , sbs )
466+ copy (val .drops , sbsDrops )
467+ copy (val .sbIdxs , sbsIndexes )
468+ flushSet = append (flushSet , val )
469+
470+ oldSegIdxs = append (oldSegIdxs , sbsIndexes ... )
471+ sbs = sbs [:0 ]
472+ sbsDrops = sbsDrops [:0 ]
473+ sbsIndexes = sbsIndexes [:0 ]
474+ totSize = 0
475+ totDocs = 0
438476 }
439- copy (val .segments , sbs )
440- copy (val .drops , sbsDrops )
441- copy (val .sbIdxs , sbsIndexes )
442- flushSet = append (flushSet , val )
443-
444- oldSegIdxs = append (oldSegIdxs , sbsIndexes ... )
445- sbs = sbs [:0 ]
446- sbsDrops = sbsDrops [:0 ]
447- sbsIndexes = sbsIndexes [:0 ]
448- totSize = 0
449- totDocs = 0
450477 }
451478
452479 if numSegsToFlushOut < DefaultMinSegmentsForInMemoryMerge {
0 commit comments