Skip to content

Commit 7a67ffd

Browse files
authored
fix block filter (#23237)
fix block filter EQ, IN, ... Approved by: @XuPeng-SH, @aunjgr
1 parent 708c7d3 commit 7a67ffd

File tree

2 files changed

+426
-56
lines changed

2 files changed

+426
-56
lines changed

pkg/container/vector/search.go

Lines changed: 120 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,9 @@ func OrderedBinarySearchOffsetByValFactory[T types.OrderedT](vals []T) func(*Vec
356356
return func(vec *Vector) []int64 {
357357
var sels []int64
358358
rows := MustFixedColNoTypeCheck[T](vec)
359+
if len(rows) == 0 || len(vals) == 0 {
360+
return sels
361+
}
359362
subVals := vals
360363
if len(vals) >= kMinLenForSubVector {
361364
minVal := rows[0]
@@ -367,32 +370,54 @@ func OrderedBinarySearchOffsetByValFactory[T types.OrderedT](vals []T) func(*Vec
367370
return maxVal < vals[i]
368371
})
369372
subVals = vals[lowerBound:upperBound]
373+
if len(subVals) == 0 {
374+
return sels
375+
}
370376
}
371377

372378
if len(subVals) <= kMaxLenForBinarySearch {
373-
offset := 0
374-
for i := range subVals {
375-
idx := sort.Search(len(rows), func(idx int) bool {
376-
return rows[idx] >= subVals[i]
379+
start := 0
380+
n1 := len(rows)
381+
for i := 0; i < len(subVals); i++ {
382+
if i > 0 && subVals[i] == subVals[i-1] {
383+
continue
384+
}
385+
idx := sort.Search(n1-start, func(idx int) bool {
386+
return rows[start+idx] >= subVals[i]
377387
})
378-
if idx < len(rows) {
379-
if rows[idx] == subVals[i] {
380-
sels = append(sels, int64(offset+idx))
381-
}
382-
offset += idx
383-
rows = rows[idx:]
384-
} else {
388+
pos := start + idx
389+
if pos >= n1 {
385390
break
386391
}
392+
if rows[pos] == subVals[i] {
393+
runEnd := pos + 1
394+
for runEnd < n1 && rows[runEnd] == subVals[i] {
395+
runEnd++
396+
}
397+
for j := pos; j < runEnd; j++ {
398+
sels = append(sels, int64(j))
399+
}
400+
start = runEnd
401+
continue
402+
}
403+
start = pos
387404
}
388405
} else {
389406
n1, n2 := len(rows), len(subVals)
390407
i1, i2 := 0, 0
391408
for i1 < n1 && i2 < n2 {
392409
if rows[i1] == subVals[i2] {
393-
sels = append(sels, int64(i1))
394-
i1++
395-
i2++
410+
val := subVals[i2]
411+
runStart := i1
412+
for i1 < n1 && rows[i1] == val {
413+
i1++
414+
}
415+
for j := runStart; j < i1; j++ {
416+
sels = append(sels, int64(j))
417+
}
418+
for i2 < n2 && subVals[i2] == val {
419+
i2++
420+
}
396421
} else if rows[i1] < subVals[i2] {
397422
i1++
398423
} else {
@@ -409,57 +434,71 @@ func VarlenBinarySearchOffsetByValFactory(vals [][]byte) func(*Vector) []int64 {
409434
return func(vec *Vector) []int64 {
410435
var sels []int64
411436
n1 := vec.Length()
412-
if n1 == 0 {
437+
if n1 == 0 || len(vals) == 0 {
413438
return sels
414439
}
440+
varlenas := MustFixedColNoTypeCheck[types.Varlena](vec)
441+
area := vec.GetArea()
442+
415443
subVals := vals
416444
if len(vals) >= kMinLenForSubVector {
417445
lowerBound := sort.Search(len(vals), func(i int) bool {
418-
return bytes.Compare(vec.GetBytesAt(0), vals[i]) <= 0
446+
return bytes.Compare(varlenas[0].GetByteSlice(area), vals[i]) <= 0
419447
})
420448
upperBound := sort.Search(len(vals), func(i int) bool {
421-
return bytes.Compare(vec.GetBytesAt(n1-1), vals[i]) < 0
449+
return bytes.Compare(varlenas[n1-1].GetByteSlice(area), vals[i]) < 0
422450
})
423451
subVals = vals[lowerBound:upperBound]
452+
if len(subVals) == 0 {
453+
return sels
454+
}
424455
}
425456

426457
if len(subVals) <= kMaxLenForBinarySearch {
427-
offset := 0
428-
for i := range subVals {
429-
idx, found := sort.Find(n1, func(idx int) int {
430-
return bytes.Compare(subVals[i], vec.GetBytesAt(offset+idx))
458+
start := 0
459+
for i := 0; i < len(subVals); i++ {
460+
if i > 0 && bytes.Equal(subVals[i], subVals[i-1]) {
461+
continue
462+
}
463+
idx := sort.Search(n1-start, func(idx int) bool {
464+
return bytes.Compare(varlenas[start+idx].GetByteSlice(area), subVals[i]) >= 0
431465
})
432-
if idx < n1 {
433-
if found {
434-
sels = append(sels, int64(offset+idx))
435-
}
436-
offset += idx
437-
n1 -= idx
438-
} else {
466+
pos := start + idx
467+
if pos >= n1 {
439468
break
440469
}
470+
if bytes.Equal(varlenas[pos].GetByteSlice(area), subVals[i]) {
471+
runEnd := pos + 1
472+
for runEnd < n1 && bytes.Equal(varlenas[runEnd].GetByteSlice(area), subVals[i]) {
473+
runEnd++
474+
}
475+
for j := pos; j < runEnd; j++ {
476+
sels = append(sels, int64(j))
477+
}
478+
start = runEnd
479+
continue
480+
}
481+
start = pos
441482
}
442483
} else {
443484
n2 := len(subVals)
444485
i1, i2 := 0, 0
445-
varlenas := MustFixedColNoTypeCheck[types.Varlena](vec)
446-
s1 := varlenas[0].GetByteSlice(vec.GetArea())
447-
for i2 < n2 {
448-
ord := bytes.Compare(s1, subVals[i2])
486+
for i1 < n1 && i2 < n2 {
487+
ord := bytes.Compare(varlenas[i1].GetByteSlice(area), subVals[i2])
449488
if ord == 0 {
450-
sels = append(sels, int64(i1))
451-
i1++
452-
if i1 == n1 {
453-
break
489+
val := subVals[i2]
490+
runStart := i1
491+
for i1 < n1 && bytes.Equal(varlenas[i1].GetByteSlice(area), val) {
492+
i1++
493+
}
494+
for j := runStart; j < i1; j++ {
495+
sels = append(sels, int64(j))
496+
}
497+
for i2 < n2 && bytes.Equal(subVals[i2], val) {
498+
i2++
454499
}
455-
i2++
456-
s1 = varlenas[i1].GetByteSlice(vec.GetArea())
457500
} else if ord < 0 {
458501
i1++
459-
if i1 == n1 {
460-
break
461-
}
462-
s1 = varlenas[i1].GetByteSlice(vec.GetArea())
463502
} else {
464503
i2++
465504
}
@@ -474,6 +513,9 @@ func FixedSizedBinarySearchOffsetByValFactory[T any](vals []T, cmp func(T, T) in
474513
return func(vec *Vector) []int64 {
475514
var sels []int64
476515
rows := MustFixedColNoTypeCheck[T](vec)
516+
if len(rows) == 0 || len(vals) == 0 {
517+
return sels
518+
}
477519

478520
subVals := vals
479521
if len(vals) >= kMinLenForSubVector {
@@ -486,33 +528,55 @@ func FixedSizedBinarySearchOffsetByValFactory[T any](vals []T, cmp func(T, T) in
486528
return cmp(maxVal, vals[i]) < 0
487529
})
488530
subVals = vals[lowerBound:upperBound]
531+
if len(subVals) == 0 {
532+
return sels
533+
}
489534
}
490535

491536
if len(subVals) <= kMaxLenForBinarySearch {
492-
offset := 0
493-
for i := range subVals {
494-
idx, found := sort.Find(len(rows), func(idx int) int {
495-
return cmp(subVals[i], rows[idx])
537+
start := 0
538+
n1 := len(rows)
539+
for i := 0; i < len(subVals); i++ {
540+
if i > 0 && cmp(subVals[i], subVals[i-1]) == 0 {
541+
continue
542+
}
543+
idx := sort.Search(n1-start, func(idx int) bool {
544+
return cmp(rows[start+idx], subVals[i]) >= 0
496545
})
497-
if idx < len(rows) {
498-
if found {
499-
sels = append(sels, int64(offset+idx))
500-
}
501-
offset += idx
502-
rows = rows[idx:]
503-
} else {
546+
pos := start + idx
547+
if pos >= n1 {
504548
break
505549
}
550+
if cmp(rows[pos], subVals[i]) == 0 {
551+
runEnd := pos + 1
552+
for runEnd < n1 && cmp(rows[runEnd], subVals[i]) == 0 {
553+
runEnd++
554+
}
555+
for j := pos; j < runEnd; j++ {
556+
sels = append(sels, int64(j))
557+
}
558+
start = runEnd
559+
continue
560+
}
561+
start = pos
506562
}
507563
} else {
508564
n1, n2 := len(rows), len(subVals)
509565
i1, i2 := 0, 0
510566
for i1 < n1 && i2 < n2 {
511567
ord := cmp(rows[i1], subVals[i2])
512568
if ord == 0 {
513-
sels = append(sels, int64(i1))
514-
i1++
515-
i2++
569+
val := subVals[i2]
570+
runStart := i1
571+
for i1 < n1 && cmp(rows[i1], val) == 0 {
572+
i1++
573+
}
574+
for j := runStart; j < i1; j++ {
575+
sels = append(sels, int64(j))
576+
}
577+
for i2 < n2 && cmp(subVals[i2], val) == 0 {
578+
i2++
579+
}
516580
} else if ord < 0 {
517581
i1++
518582
} else {

0 commit comments

Comments
 (0)