Skip to content

Commit a20efc1

Browse files
MB-64883 - Avoid redundant computation of eligible IDs (#2143)
Currently, filter eligible hits, which aren't specific to a segment, are computed redundantly for each segment. This PR pre-computes the eligible segment IDs and re-uses these for each segment.
1 parent bd49319 commit a20efc1

File tree

1 file changed

+25
-8
lines changed

1 file changed

+25
-8
lines changed

index/scorch/optimize_knn.go

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,24 @@ func (o *OptimizeVR) Finish() error {
6666
var errors []error
6767

6868
var snapshotGlobalDocNums map[int]*roaring.Bitmap
69+
var eligibleDocIDsMap map[string]map[int]*roaring.Bitmap
70+
fields := make([]string, len(o.vrs))
71+
for field := range o.vrs {
72+
fields = append(fields, field)
73+
}
74+
6975
if o.requiresFiltering {
7076
snapshotGlobalDocNums = o.snapshot.globalDocNums()
77+
eligibleDocIDsMap = make(map[string]map[int]*roaring.Bitmap)
78+
for _, field := range fields {
79+
vrs := o.vrs[field]
80+
eligibleDocIDsMap[field] = make(map[int]*roaring.Bitmap)
81+
for index, vr := range vrs {
82+
if vr.eligibleDocIDs != nil && len(vr.eligibleDocIDs) > 0 {
83+
eligibleDocIDsMap[field][index] = vr.getEligibleDocIDs()
84+
}
85+
}
86+
}
7187
}
7288

7389
defer o.invokeSearcherEndCallback()
@@ -84,7 +100,8 @@ func (o *OptimizeVR) Finish() error {
84100
<-semaphore // Release the semaphore slot
85101
wg.Done()
86102
}()
87-
for field, vrs := range o.vrs {
103+
for _, field := range fields {
104+
vrs := o.vrs[field]
88105
vecIndex, err := segment.InterpretVectorIndex(field,
89106
o.requiresFiltering, origSeg.deleted)
90107
if err != nil {
@@ -97,7 +114,7 @@ func (o *OptimizeVR) Finish() error {
97114
// update the vector index size as a meta value in the segment snapshot
98115
vectorIndexSize := vecIndex.Size()
99116
origSeg.cachedMeta.updateMeta(field, vectorIndexSize)
100-
for _, vr := range vrs {
117+
for vrIdx, vr := range vrs {
101118
var pl segment_api.VecPostingsList
102119
var err error
103120

@@ -106,21 +123,21 @@ func (o *OptimizeVR) Finish() error {
106123

107124
// Only applies to filtered kNN.
108125
if vr.eligibleDocIDs != nil && len(vr.eligibleDocIDs) > 0 {
109-
eligibleVectorInternalIDs := vr.getEligibleDocIDs()
126+
eligibleVectorInternalIDs := eligibleDocIDsMap[field][vrIdx]
127+
eligibleVectorInternalIDsClone := eligibleVectorInternalIDs.Clone()
110128
if snapshotGlobalDocNums != nil {
111129
// Only the eligible documents belonging to this segment
112130
// will get filtered out.
113131
// There is no way to determine which doc belongs to which segment
114-
eligibleVectorInternalIDs.And(snapshotGlobalDocNums[index])
132+
eligibleVectorInternalIDsClone.And(snapshotGlobalDocNums[index])
115133
}
116134

117-
eligibleLocalDocNums := make([]uint64,
118-
eligibleVectorInternalIDs.GetCardinality())
135+
eligibleLocalDocNums := make([]uint64, 0)
119136
// get the (segment-)local document numbers
120-
for i, docNum := range eligibleVectorInternalIDs.ToArray() {
137+
for _, docNum := range eligibleVectorInternalIDsClone.ToArray() {
121138
localDocNum := o.snapshot.localDocNumFromGlobal(index,
122139
uint64(docNum))
123-
eligibleLocalDocNums[i] = localDocNum
140+
eligibleLocalDocNums = append(eligibleLocalDocNums, localDocNum)
124141
}
125142

126143
pl, err = vecIndex.SearchWithFilter(vr.vector, vr.k,

0 commit comments

Comments
 (0)