@@ -356,6 +356,9 @@ func OrderedBinarySearchOffsetByValFactory[T types.OrderedT](vals []T) func(*Vec
356356 return func (vec * Vector ) []int64 {
357357 var sels []int64
358358 rows := MustFixedColNoTypeCheck [T ](vec )
359+ if len (rows ) == 0 || len (vals ) == 0 {
360+ return sels
361+ }
359362 subVals := vals
360363 if len (vals ) >= kMinLenForSubVector {
361364 minVal := rows [0 ]
@@ -367,32 +370,54 @@ func OrderedBinarySearchOffsetByValFactory[T types.OrderedT](vals []T) func(*Vec
367370 return maxVal < vals [i ]
368371 })
369372 subVals = vals [lowerBound :upperBound ]
373+ if len (subVals ) == 0 {
374+ return sels
375+ }
370376 }
371377
372378 if len (subVals ) <= kMaxLenForBinarySearch {
373- offset := 0
374- for i := range subVals {
375- idx := sort .Search (len (rows ), func (idx int ) bool {
376- return rows [idx ] >= subVals [i ]
379+ start := 0
380+ n1 := len (rows )
381+ for i := 0 ; i < len (subVals ); i ++ {
382+ if i > 0 && subVals [i ] == subVals [i - 1 ] {
383+ continue
384+ }
385+ idx := sort .Search (n1 - start , func (idx int ) bool {
386+ return rows [start + idx ] >= subVals [i ]
377387 })
378- if idx < len (rows ) {
379- if rows [idx ] == subVals [i ] {
380- sels = append (sels , int64 (offset + idx ))
381- }
382- offset += idx
383- rows = rows [idx :]
384- } else {
388+ pos := start + idx
389+ if pos >= n1 {
385390 break
386391 }
392+ if rows [pos ] == subVals [i ] {
393+ runEnd := pos + 1
394+ for runEnd < n1 && rows [runEnd ] == subVals [i ] {
395+ runEnd ++
396+ }
397+ for j := pos ; j < runEnd ; j ++ {
398+ sels = append (sels , int64 (j ))
399+ }
400+ start = runEnd
401+ continue
402+ }
403+ start = pos
387404 }
388405 } else {
389406 n1 , n2 := len (rows ), len (subVals )
390407 i1 , i2 := 0 , 0
391408 for i1 < n1 && i2 < n2 {
392409 if rows [i1 ] == subVals [i2 ] {
393- sels = append (sels , int64 (i1 ))
394- i1 ++
395- i2 ++
410+ val := subVals [i2 ]
411+ runStart := i1
412+ for i1 < n1 && rows [i1 ] == val {
413+ i1 ++
414+ }
415+ for j := runStart ; j < i1 ; j ++ {
416+ sels = append (sels , int64 (j ))
417+ }
418+ for i2 < n2 && subVals [i2 ] == val {
419+ i2 ++
420+ }
396421 } else if rows [i1 ] < subVals [i2 ] {
397422 i1 ++
398423 } else {
@@ -409,57 +434,71 @@ func VarlenBinarySearchOffsetByValFactory(vals [][]byte) func(*Vector) []int64 {
409434 return func (vec * Vector ) []int64 {
410435 var sels []int64
411436 n1 := vec .Length ()
412- if n1 == 0 {
437+ if n1 == 0 || len ( vals ) == 0 {
413438 return sels
414439 }
440+ varlenas := MustFixedColNoTypeCheck [types.Varlena ](vec )
441+ area := vec .GetArea ()
442+
415443 subVals := vals
416444 if len (vals ) >= kMinLenForSubVector {
417445 lowerBound := sort .Search (len (vals ), func (i int ) bool {
418- return bytes .Compare (vec . GetBytesAt ( 0 ), vals [i ]) <= 0
446+ return bytes .Compare (varlenas [ 0 ]. GetByteSlice ( area ), vals [i ]) <= 0
419447 })
420448 upperBound := sort .Search (len (vals ), func (i int ) bool {
421- return bytes .Compare (vec . GetBytesAt ( n1 - 1 ), vals [i ]) < 0
449+ return bytes .Compare (varlenas [ n1 - 1 ]. GetByteSlice ( area ), vals [i ]) < 0
422450 })
423451 subVals = vals [lowerBound :upperBound ]
452+ if len (subVals ) == 0 {
453+ return sels
454+ }
424455 }
425456
426457 if len (subVals ) <= kMaxLenForBinarySearch {
427- offset := 0
428- for i := range subVals {
429- idx , found := sort .Find (n1 , func (idx int ) int {
430- return bytes .Compare (subVals [i ], vec .GetBytesAt (offset + idx ))
458+ start := 0
459+ for i := 0 ; i < len (subVals ); i ++ {
460+ if i > 0 && bytes .Equal (subVals [i ], subVals [i - 1 ]) {
461+ continue
462+ }
463+ idx := sort .Search (n1 - start , func (idx int ) bool {
464+ return bytes .Compare (varlenas [start + idx ].GetByteSlice (area ), subVals [i ]) >= 0
431465 })
432- if idx < n1 {
433- if found {
434- sels = append (sels , int64 (offset + idx ))
435- }
436- offset += idx
437- n1 -= idx
438- } else {
466+ pos := start + idx
467+ if pos >= n1 {
439468 break
440469 }
470+ if bytes .Equal (varlenas [pos ].GetByteSlice (area ), subVals [i ]) {
471+ runEnd := pos + 1
472+ for runEnd < n1 && bytes .Equal (varlenas [runEnd ].GetByteSlice (area ), subVals [i ]) {
473+ runEnd ++
474+ }
475+ for j := pos ; j < runEnd ; j ++ {
476+ sels = append (sels , int64 (j ))
477+ }
478+ start = runEnd
479+ continue
480+ }
481+ start = pos
441482 }
442483 } else {
443484 n2 := len (subVals )
444485 i1 , i2 := 0 , 0
445- varlenas := MustFixedColNoTypeCheck [types.Varlena ](vec )
446- s1 := varlenas [0 ].GetByteSlice (vec .GetArea ())
447- for i2 < n2 {
448- ord := bytes .Compare (s1 , subVals [i2 ])
486+ for i1 < n1 && i2 < n2 {
487+ ord := bytes .Compare (varlenas [i1 ].GetByteSlice (area ), subVals [i2 ])
449488 if ord == 0 {
450- sels = append (sels , int64 (i1 ))
451- i1 ++
452- if i1 == n1 {
453- break
489+ val := subVals [i2 ]
490+ runStart := i1
491+ for i1 < n1 && bytes .Equal (varlenas [i1 ].GetByteSlice (area ), val ) {
492+ i1 ++
493+ }
494+ for j := runStart ; j < i1 ; j ++ {
495+ sels = append (sels , int64 (j ))
496+ }
497+ for i2 < n2 && bytes .Equal (subVals [i2 ], val ) {
498+ i2 ++
454499 }
455- i2 ++
456- s1 = varlenas [i1 ].GetByteSlice (vec .GetArea ())
457500 } else if ord < 0 {
458501 i1 ++
459- if i1 == n1 {
460- break
461- }
462- s1 = varlenas [i1 ].GetByteSlice (vec .GetArea ())
463502 } else {
464503 i2 ++
465504 }
@@ -474,6 +513,9 @@ func FixedSizedBinarySearchOffsetByValFactory[T any](vals []T, cmp func(T, T) in
474513 return func (vec * Vector ) []int64 {
475514 var sels []int64
476515 rows := MustFixedColNoTypeCheck [T ](vec )
516+ if len (rows ) == 0 || len (vals ) == 0 {
517+ return sels
518+ }
477519
478520 subVals := vals
479521 if len (vals ) >= kMinLenForSubVector {
@@ -486,33 +528,55 @@ func FixedSizedBinarySearchOffsetByValFactory[T any](vals []T, cmp func(T, T) in
486528 return cmp (maxVal , vals [i ]) < 0
487529 })
488530 subVals = vals [lowerBound :upperBound ]
531+ if len (subVals ) == 0 {
532+ return sels
533+ }
489534 }
490535
491536 if len (subVals ) <= kMaxLenForBinarySearch {
492- offset := 0
493- for i := range subVals {
494- idx , found := sort .Find (len (rows ), func (idx int ) int {
495- return cmp (subVals [i ], rows [idx ])
537+ start := 0
538+ n1 := len (rows )
539+ for i := 0 ; i < len (subVals ); i ++ {
540+ if i > 0 && cmp (subVals [i ], subVals [i - 1 ]) == 0 {
541+ continue
542+ }
543+ idx := sort .Search (n1 - start , func (idx int ) bool {
544+ return cmp (rows [start + idx ], subVals [i ]) >= 0
496545 })
497- if idx < len (rows ) {
498- if found {
499- sels = append (sels , int64 (offset + idx ))
500- }
501- offset += idx
502- rows = rows [idx :]
503- } else {
546+ pos := start + idx
547+ if pos >= n1 {
504548 break
505549 }
550+ if cmp (rows [pos ], subVals [i ]) == 0 {
551+ runEnd := pos + 1
552+ for runEnd < n1 && cmp (rows [runEnd ], subVals [i ]) == 0 {
553+ runEnd ++
554+ }
555+ for j := pos ; j < runEnd ; j ++ {
556+ sels = append (sels , int64 (j ))
557+ }
558+ start = runEnd
559+ continue
560+ }
561+ start = pos
506562 }
507563 } else {
508564 n1 , n2 := len (rows ), len (subVals )
509565 i1 , i2 := 0 , 0
510566 for i1 < n1 && i2 < n2 {
511567 ord := cmp (rows [i1 ], subVals [i2 ])
512568 if ord == 0 {
513- sels = append (sels , int64 (i1 ))
514- i1 ++
515- i2 ++
569+ val := subVals [i2 ]
570+ runStart := i1
571+ for i1 < n1 && cmp (rows [i1 ], val ) == 0 {
572+ i1 ++
573+ }
574+ for j := runStart ; j < i1 ; j ++ {
575+ sels = append (sels , int64 (j ))
576+ }
577+ for i2 < n2 && cmp (subVals [i2 ], val ) == 0 {
578+ i2 ++
579+ }
516580 } else if ord < 0 {
517581 i1 ++
518582 } else {
0 commit comments