cockroachdb
diff --git a/‎pkg/cmd/vecbench/mem_provider.go‎
Lines changed: 3 additions & 1 deletion b/‎pkg/cmd/vecbench/mem_provider.go‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎pkg/sql/exec_util.go‎
Lines changed: 4 additions & 0 deletions b/‎pkg/sql/exec_util.go‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pkg/sql/logictest/testdata/logic_test/information_schema‎
Lines changed: 1 addition & 0 deletions b/‎pkg/sql/logictest/testdata/logic_test/information_schema‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pkg/sql/logictest/testdata/logic_test/pg_catalog‎
Lines changed: 3 additions & 0 deletions b/‎pkg/sql/logictest/testdata/logic_test/pg_catalog‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pkg/sql/logictest/testdata/logic_test/show_source‎
Lines changed: 1 addition & 0 deletions b/‎pkg/sql/logictest/testdata/logic_test/show_source‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pkg/sql/logictest/testdata/logic_test/vector_index‎
Lines changed: 23 additions & 3 deletions b/‎pkg/sql/logictest/testdata/logic_test/vector_index‎
Lines changed: 23 additions & 3 deletions
diff --git a/‎pkg/sql/rowexec/vector_search.go‎
Lines changed: 3 additions & 1 deletion b/‎pkg/sql/rowexec/vector_search.go‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎pkg/sql/sessiondatapb/local_only_session_data.proto‎
Lines changed: 5 additions & 0 deletions b/‎pkg/sql/sessiondatapb/local_only_session_data.proto‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎pkg/sql/vars.go‎
Lines changed: 25 additions & 2 deletions b/‎pkg/sql/vars.go‎
Lines changed: 25 additions & 2 deletions
diff --git a/‎pkg/sql/vecindex/cspann/index.go‎
Lines changed: 19 additions & 11 deletions b/‎pkg/sql/vecindex/cspann/index.go‎
Lines changed: 19 additions & 11 deletions
@@ -165,7 +165,9 @@ func (m *MemProvider) Search(
 		// Search the store.
 		var idxCtx cspann.Context
 		idxCtx.Init(txn)
-		searchSet := cspann.SearchSet{MaxResults: memState.maxResults}
+		maxResults, maxExtraResults :=
+			cspann.IncreaseRerankResults(memState.beamSize, memState.maxResults, 50)
+		searchSet := cspann.SearchSet{MaxResults: maxResults, MaxExtraResults: maxExtraResults}
 		searchOptions := cspann.SearchOptions{BaseBeamSize: memState.beamSize}
 		err = m.index.Search(ctx, &idxCtx, nil /* treeKey */, vec, &searchSet, searchOptions)
 		if err != nil {
 
@@ -4212,6 +4212,10 @@ func (m *sessionDataMutator) SetVectorSearchBeamSize(val int32) {
 	m.data.VectorSearchBeamSize = val
 }
 
+func (m *sessionDataMutator) SetVectorSearchRerankMultiplier(val int32) {
+	m.data.VectorSearchRerankMultiplier = val
+}
+
 func (m *sessionDataMutator) SetPropagateAdmissionHeaderToLeafTransactions(val bool) {
 	m.data.PropagateAdmissionHeaderToLeafTransactions = val
 }
 
@@ -4141,6 +4141,7 @@ use_pre_25_2_variadic_builtins                                   off
 use_proc_txn_control_extended_protocol_fix                       on
 variable_inequality_lookup_join_enabled                          on
 vector_search_beam_size                                          32
+vector_search_rerank_multiplier                                  50
 xmloption                                                        content
 
 # information_schema can be used with the anonymous database.
 
@@ -3146,6 +3146,7 @@ use_pre_25_2_variadic_builtins                                   off
 use_proc_txn_control_extended_protocol_fix                       on                  NULL      NULL        NULL        string
 variable_inequality_lookup_join_enabled                          on                  NULL      NULL        NULL        string
 vector_search_beam_size                                          32                  NULL      NULL        NULL        string
+vector_search_rerank_multiplier                                  50                  NULL      NULL        NULL        string
 vectorize                                                        on                  NULL      NULL        NULL        string
 xmloption                                                        content             NULL      NULL        NULL        string
 
@@ -3382,6 +3383,7 @@ use_pre_25_2_variadic_builtins                                   off
 use_proc_txn_control_extended_protocol_fix                       on                  NULL  user     NULL      on                  on
 variable_inequality_lookup_join_enabled                          on                  NULL  user     NULL      on                  on
 vector_search_beam_size                                          32                  NULL  user     NULL      32                  32
+vector_search_rerank_multiplier                                  50                  NULL  user     NULL      50                  50
 vectorize                                                        on                  NULL  user     NULL      on                  on
 xmloption                                                        content             NULL  user     NULL      content             content
 
@@ -3610,6 +3612,7 @@ use_pre_25_2_variadic_builtins                                   NULL    NULL
 use_proc_txn_control_extended_protocol_fix                       NULL    NULL     NULL     NULL        NULL
 variable_inequality_lookup_join_enabled                          NULL    NULL     NULL     NULL        NULL
 vector_search_beam_size                                          NULL    NULL     NULL     NULL        NULL
+vector_search_rerank_multiplier                                  NULL    NULL     NULL     NULL        NULL
 vectorize                                                        NULL    NULL     NULL     NULL        NULL
 xmloption                                                        NULL    NULL     NULL     NULL        NULL
 
 
@@ -250,6 +250,7 @@ use_pre_25_2_variadic_builtins                                   off
 use_proc_txn_control_extended_protocol_fix                       on
 variable_inequality_lookup_join_enabled                          on
 vector_search_beam_size                                          32
+vector_search_rerank_multiplier                                  50
 vectorize                                                        on
 xmloption                                                        content
 
 
@@ -843,11 +843,31 @@ SHOW vector_search_beam_size;
 ----
 8
 
-statement error vector_search_beam_size cannot be less than 1 or greater than 512
+statement error vector_search_beam_size cannot be less than 1 or greater than 2048
 SET vector_search_beam_size=0
 
-statement error vector_search_beam_size cannot be less than 1 or greater than 512
-SET vector_search_beam_size=513
+statement error vector_search_beam_size cannot be less than 1 or greater than 2048
+SET vector_search_beam_size=2049
+
+# Ensure that the vector_search_rerank_multiplier session setting is settable.
+query T
+SHOW vector_search_rerank_multiplier;
+----
+50
+
+statement ok
+SET vector_search_rerank_multiplier=100
+
+query T
+SHOW vector_search_rerank_multiplier;
+----
+100
+
+statement error vector_search_rerank_multiplier cannot be less than 0 or greater than 100
+SET vector_search_rerank_multiplier=-1
+
+statement error vector_search_rerank_multiplier cannot be less than 0 or greater than 100
+SET vector_search_rerank_multiplier=101
 
 subtest end
 
 
@@ -61,7 +61,9 @@ func newVectorSearchProcessor(
 	}
 	searchBeamSize := int(flowCtx.EvalCtx.SessionData().VectorSearchBeamSize)
 	maxResults := int(v.targetCount)
-	v.searcher.Init(flowCtx.EvalCtx, idx, flowCtx.Txn, &spec.GetFullVectorsFetchSpec, searchBeamSize, maxResults)
+	rerankMultiplier := int(flowCtx.EvalCtx.SessionData().VectorSearchRerankMultiplier)
+	v.searcher.Init(flowCtx.EvalCtx,
+		idx, flowCtx.Txn, &spec.GetFullVectorsFetchSpec, searchBeamSize, maxResults, rerankMultiplier)
 	colTypes := make([]*types.T, len(v.fetchSpec.FetchedColumns))
 	for i, col := range v.fetchSpec.FetchedColumns {
 		colTypes[i] = col.Type
 
@@ -694,6 +694,11 @@ message LocalOnlySessionData {
   bool enable_scrub_job = 176;
   // AllowViewWithSecurityInvokerClause indicates whether security invoker for views is enabled
   bool allow_view_with_security_invoker_clause = 177;
+  // VectorSearchRerankMultiplier controls how many of the initial search results
+  // can be reranked using exact distance calculations with the original
+  // full-size vectors. It acts as a multiplier on a base limit derived from the
+  // search beam size and the top-k results requested by the query.
+  int32 vector_search_rerank_multiplier = 178;
 
   ///////////////////////////////////////////////////////////////////////////
   // WARNING: consider whether a session parameter you're adding needs to  //
 
@@ -4047,9 +4047,9 @@ var varGen = map[string]sessionVar{
 			if err != nil {
 				return err
 			}
-			if b < 1 || b > 512 {
+			if b < 1 || b > 2048 {
 				return pgerror.Newf(pgcode.InvalidParameterValue,
-					"vector_search_beam_size cannot be less than 1 or greater than 512")
+					"vector_search_beam_size cannot be less than 1 or greater than 2048")
 			}
 			m.SetVectorSearchBeamSize(int32(b))
 			return nil
@@ -4062,6 +4062,29 @@ var varGen = map[string]sessionVar{
 		},
 	},
 
+	// CockroachDB extension.
+	`vector_search_rerank_multiplier`: {
+		GetStringVal: makeIntGetStringValFn(`vector_search_rerank_multiplier`),
+		Set: func(_ context.Context, m sessionDataMutator, s string) error {
+			b, err := strconv.ParseInt(s, 10, 32)
+			if err != nil {
+				return err
+			}
+			if b < 0 || b > 100 {
+				return pgerror.Newf(pgcode.InvalidParameterValue,
+					"vector_search_rerank_multiplier cannot be less than 0 or greater than 100")
+			}
+			m.SetVectorSearchRerankMultiplier(int32(b))
+			return nil
+		},
+		Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
+			return strconv.FormatInt(int64(evalCtx.SessionData().VectorSearchRerankMultiplier), 10), nil
+		},
+		GlobalDefault: func(sv *settings.Values) string {
+			return "50"
+		},
+	},
+
 	// CockroachDB extension.
 	`propagate_admission_header_to_leaf_transactions`: {
 		GetStringVal: makePostgresBoolGetStringValFn(`propagate_admission_header_to_leaf_transactions`),
 
@@ -25,10 +25,6 @@ import (
 	"github.com/cockroachdb/errors"
 )
 
-// RerankMultiplier is multiplied by MaxResults to calculate the maximum number
-// of search results that will be reranked with the original full-size vectors.
-const RerankMultiplier = 10
-
 // DeletedMinCount sets a minimum number of results that will be reranked, in
 // order to account for vectors that may have been deleted in the primary index.
 const DeletedMinCount = 10
@@ -41,18 +37,30 @@ const DeletedMultiplier = 1.2
 // MaxQualitySamples specifies the max value of the QualitySamples index option.
 const MaxQualitySamples = 32
 
-// IncreaseRerankResults returns good values for maxResults and maxExtraResults
-// that have a high probability of returning the desired number of results, even
-// when there are deleted results. Deleted results will be filtered out by the
-// rerank process, so we need to make sure there are additional results that can
-// be returned instead.
+// IncreaseRerankResults returns good values for maxResults and maxExtraResults.
+// Deleted results will be filtered out of the final results, so we need to make
+// sure there are additional results that can be returned instead. In addition,
+// quantization error can reduce the accuracy of results, so we need to return
+// extra results that can be reranked by exact distance calculations. Both the
+// search beam size and the top-k limit of results emperically have a
+// logarithmic relationship to the number of vectors that need to be reranked,
+// so use this formula to set a bound to the number of extra results:
+//
+// maxExtraResults =
+// log2(searchBeamSize) * log2(desiredMaxResults) * rerankMultiplier
+//
+// The rerank multiplier is a session setting that can be used to set a tighter
+// or looser bound.
 //
 // TODO(andyk): Switch the index to use a search iterator so the caller can keep
 // requesting further results rather than guessing at how many additional
 // results might be needed.
-func IncreaseRerankResults(desiredMaxResults int) (maxResults, maxExtraResults int) {
+func IncreaseRerankResults(
+	searchBeamSize, desiredMaxResults, rerankMultiplier int,
+) (maxResults, maxExtraResults int) {
 	maxResults = max(int(math.Ceil(float64(desiredMaxResults)*DeletedMultiplier)), DeletedMinCount)
-	maxExtraResults = desiredMaxResults * RerankMultiplier
+	log := math.Log2(float64(max(searchBeamSize, 2))) * math.Log2(float64(max(desiredMaxResults, 2)))
+	maxExtraResults = int(log) * rerankMultiplier
 	return maxResults, maxExtraResults
 }
Original file line number	Diff line number	Diff line change
`@@ -4212,6 +4212,10 @@ func (m *sessionDataMutator) SetVectorSearchBeamSize(val int32) {`
`4212`	`4212`	`m.data.VectorSearchBeamSize = val`
`4213`	`4213`	`}`
`4214`	`4214`
	`4215`	`+func (m *sessionDataMutator) SetVectorSearchRerankMultiplier(val int32) {`
	`4216`	`+ m.data.VectorSearchRerankMultiplier = val`
	`4217`	`+}`
	`4218`	`+`
`4215`	`4219`	`func (m *sessionDataMutator) SetPropagateAdmissionHeaderToLeafTransactions(val bool) {`
`4216`	`4220`	`m.data.PropagateAdmissionHeaderToLeafTransactions = val`
`4217`	`4221`	`}`