Skip to content

Commit 7b97b52

Browse files
committed
updating merge planner to use fileSize in budget calculation
1 parent 833bcfb commit 7b97b52

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

index/scorch/merge.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,14 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
225225
return nil, err
226226
}
227227
}
228+
229+
po, err := s.parsePersisterOptions()
230+
if err != nil {
231+
return nil, err
232+
}
233+
234+
mergePlannerOptions.FloorSegmentFileSize = int64(po.MaxSizeInMemoryMerge)
235+
228236
return &mergePlannerOptions, nil
229237
}
230238

index/scorch/mergeplan/merge_plan.go

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ type MergePlanOptions struct {
9999
// of tiny segments from resulting in a long tail in the index.
100100
FloorSegmentSize int64
101101

102+
// Small segments' file size are rounded up to this size to prevent lot
103+
// of tiny segments causing a long tail in the index.
104+
FloorSegmentFileSize int64
105+
102106
// Controls how aggressively merges that reclaim more deletions
103107
// are favored. Higher values will more aggressively target
104108
// merges that reclaim deletions, but be careful not to go so high
@@ -126,6 +130,13 @@ func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
126130
return o.FloorSegmentSize
127131
}
128132

133+
func (o *MergePlanOptions) RaiseToFloorSegmentFileSize(s int64) int64 {
134+
if s > o.FloorSegmentFileSize {
135+
return s
136+
}
137+
return o.FloorSegmentFileSize
138+
}
139+
129140
// MaxSegmentSizeLimit represents the maximum size of a segment,
130141
// this limit comes with hit-1 optimisation/max encoding limit uint31.
131142
const MaxSegmentSizeLimit = 1<<31 - 1
@@ -155,6 +166,7 @@ var SingleSegmentMergePlanOptions = MergePlanOptions{
155166
SegmentsPerMergeTask: 10,
156167
FloorSegmentSize: 1 << 30,
157168
ReclaimDeletesWeight: 2.0,
169+
FloorSegmentFileSize: 1 << 40,
158170
}
159171

160172
// -------------------------------------------
@@ -176,12 +188,18 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
176188

177189
var eligibles []Segment
178190
var eligiblesLiveSize int64
191+
var eligiblesFileSize int64
192+
var minFileSize int64 = math.MaxInt64
179193

180194
for _, segment := range segments {
181195
if minLiveSize > segment.LiveSize() {
182196
minLiveSize = segment.LiveSize()
183197
}
184198

199+
if minFileSize > segment.FileSize() {
200+
minFileSize = segment.FileSize()
201+
}
202+
185203
isEligible := segment.LiveSize() < o.MaxSegmentSize/2
186204
// An eligible segment (based on #documents) may be too large
187205
// and thus need a stricter check based on the file size.
@@ -195,17 +213,24 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
195213
if isEligible {
196214
eligibles = append(eligibles, segment)
197215
eligiblesLiveSize += segment.LiveSize()
216+
eligiblesFileSize += segment.FileSize()
198217
}
199218
}
200219

201-
minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
202-
203220
calcBudget := o.CalcBudget
204221
if calcBudget == nil {
205222
calcBudget = CalcBudget
206223
}
207224

208-
budgetNumSegments := calcBudget(eligiblesLiveSize, minLiveSize, o)
225+
var budgetNumSegments int
226+
if o.FloorSegmentFileSize > 0 {
227+
minFileSize = o.RaiseToFloorSegmentFileSize(minFileSize)
228+
budgetNumSegments = calcBudget(eligiblesFileSize, minFileSize, o)
229+
230+
} else {
231+
minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
232+
budgetNumSegments = calcBudget(eligiblesLiveSize, minLiveSize, o)
233+
}
209234

210235
scoreSegments := o.ScoreSegments
211236
if scoreSegments == nil {

0 commit comments

Comments
 (0)