Skip to content

Commit da1076b

Browse files
authored
Merge pull request ClickHouse#79870 from rschu1ze/vec_search_limit
Tiny vector search cleanups
2 parents 796df50 + 570906b commit da1076b

File tree

10 files changed

+14
-11
lines changed

10 files changed

+14
-11
lines changed

docs/en/engines/table-engines/mergetree-family/annindexes.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ The default value of the setting 256 works well in the majority of use cases.
179179
Higher setting values mean better accuracy at the cost of slower performance.
180180

181181
If the query can use a vector similarity index, ClickHouse checks that the LIMIT `<N>` provided in SELECT queries is within reasonable bounds.
182-
More specifically, an error is returned if `<N>` is bigger than the value of setting [max_limit_for_ann_queries](../../../operations/settings/settings.md#max_limit_for_ann_queries) with default value 100.
182+
More specifically, an error is returned if `<N>` is bigger than the value of setting [max_limit_for_vector_search_queries](../../../operations/settings/settings.md#max_limit_for_vector_search_queries) with default value 100.
183183
Too large LIMITs can slow down searches and usually indicate a usage error.
184184

185185
To check if a SELECT query uses a vector similarity index, you can prefix the query with `EXPLAIN indexes = 1`.

src/Core/Settings.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6580,7 +6580,7 @@ Allow experimental vector similarity index
65806580
DECLARE(Bool, allow_experimental_codecs, false, R"(
65816581
If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).
65826582
)", EXPERIMENTAL) \
6583-
DECLARE(UInt64, max_limit_for_ann_queries, 1'000'000, R"(
6583+
DECLARE(UInt64, max_limit_for_vector_search_queries, 1'000, R"(
65846584
SELECT queries with LIMIT bigger than this setting cannot use vector similarity indices. Helps to prevent memory overflows in vector similarity indices.
65856585
)", EXPERIMENTAL) \
65866586
DECLARE(UInt64, hnsw_candidate_list_size_for_search, 256, R"(
@@ -6798,6 +6798,7 @@ Experimental tsToGrid aggregate function for Prometheus-like timeseries resampli
67986798
MAKE_OBSOLETE(M, Bool, allow_experimental_annoy_index, false) \
67996799
MAKE_OBSOLETE(M, UInt64, max_threads_for_annoy_index_creation, 4) \
68006800
MAKE_OBSOLETE(M, Int64, annoy_index_search_k_nodes, -1) \
6801+
MAKE_OBSOLETE(M, Int64, max_limit_for_ann_queries, -1) \
68016802
MAKE_OBSOLETE(M, Bool, allow_experimental_usearch_index, false) \
68026803
MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \
68036804
MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \

src/Core/SettingsChangesHistory.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
8080
{"output_format_pretty_glue_chunks", "0", "auto", "A new setting to make Pretty formats prettier."},
8181
{"distributed_cache_read_only_from_current_az", true, true, "New setting"},
8282
{"parallel_hash_join_threshold", 0, 100'000, "New setting"},
83+
{"max_limit_for_ann_queries", 1'000, 0, "Obsolete setting"},
84+
{"max_limit_for_vector_search_queries", 1'000, 1'000, "New setting"},
8385
{"min_os_cpu_wait_time_ratio_to_throw", 0, 0, "Setting values were changed and backported to 25.4"},
8486
{"max_os_cpu_wait_time_ratio_to_throw", 0, 0, "Setting values were changed and backported to 25.4"},
8587
{"make_distributed_plan", 0, 0, "New experimental setting."},

src/Processors/QueryPlan/Optimizations/Optimizations.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ struct Optimization
3131
{
3232
struct ExtraSettings
3333
{
34-
size_t max_limit_for_ann_queries;
34+
size_t max_limit_for_vector_search_queries;
3535
size_t use_index_for_in_with_subqueries_max_values;
3636
SizeLimits network_transfer_limits;
3737
};

src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ namespace Setting
4141
extern const SettingsMaxThreads max_threads;
4242
extern const SettingsSeconds lock_acquire_timeout;
4343
extern const SettingsString force_optimize_projection_name;
44-
extern const SettingsUInt64 max_limit_for_ann_queries;
44+
extern const SettingsUInt64 max_limit_for_vector_search_queries;
4545
extern const SettingsUInt64 query_plan_max_optimizations_to_apply;
4646
extern const SettingsBool query_plan_optimize_lazy_materialization;
4747
extern const SettingsUInt64 query_plan_max_limit_for_lazy_materialization;
@@ -106,7 +106,7 @@ QueryPlanOptimizationSettings::QueryPlanOptimizationSettings(
106106
optimize_lazy_materialization = from[Setting::query_plan_optimize_lazy_materialization];
107107
max_limit_for_lazy_materialization = from[Setting::query_plan_max_limit_for_lazy_materialization];
108108

109-
max_limit_for_ann_queries = from[Setting::max_limit_for_ann_queries].value;
109+
max_limit_for_vector_search_queries = from[Setting::max_limit_for_vector_search_queries].value;
110110
query_plan_join_shard_by_pk_ranges = from[Setting::query_plan_join_shard_by_pk_ranges].value;
111111

112112
network_transfer_limits = SizeLimits(from[Setting::max_rows_to_transfer], from[Setting::max_bytes_to_transfer], from[Setting::transfer_overflow_mode]);

src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ struct QueryPlanOptimizationSettings
8787
bool optimize_lazy_materialization = false;
8888
size_t max_limit_for_lazy_materialization = 0;
8989

90-
size_t max_limit_for_ann_queries;
90+
size_t max_limit_for_vector_search_queries;
9191

9292
/// Setting needed for Sets (JOIN -> IN optimization)
9393

src/Processors/QueryPlan/Optimizations/optimizeTree.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & optimization_se
4747

4848

4949
Optimization::ExtraSettings extra_settings = {
50-
optimization_settings.max_limit_for_ann_queries,
50+
optimization_settings.max_limit_for_vector_search_queries,
5151
optimization_settings.use_index_for_in_with_subqueries_max_values,
5252
optimization_settings.network_transfer_limits,
5353
};

src/Processors/QueryPlan/Optimizations/useVectorSearch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ size_t tryUseVectorSearch(QueryPlan::Node * parent_node, QueryPlan::Nodes & /*no
7979
size_t n = limit_step->getLimitForSorting();
8080

8181
/// Check that the LIMIT specified by the user isn't too big - otherwise the cost of vector search outweighs the benefit.
82-
if (n > settings.max_limit_for_ann_queries)
82+
if (n > settings.max_limit_for_vector_search_queries)
8383
return updated_layers;
8484

8585
/// Not 100% sure but other sort types are likely not what we want

tests/queries/0_stateless/02354_vector_search_queries.reference

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ Expression (Project names)
5959
Description: vector_similarity GRANULARITY 2
6060
Parts: 1/1
6161
Granules: 3/4
62-
-- Setting "max_limit_for_ann_queries"
62+
-- Setting "max_limit_for_vector_search_queries"
6363
Expression (Project names)
6464
LazilyRead (Lazily Read)
6565
Limit (preliminary LIMIT (without OFFSET))

tests/queries/0_stateless/02354_vector_search_queries.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,14 @@ FROM tab
6969
ORDER BY cosineDistance(vec, reference_vec)
7070
LIMIT 3;
7171

72-
SELECT '-- Setting "max_limit_for_ann_queries"';
72+
SELECT '-- Setting "max_limit_for_vector_search_queries"';
7373
EXPLAIN indexes=1
7474
WITH [0.0, 2.0] as reference_vec
7575
SELECT id, vec, cosineDistance(vec, reference_vec)
7676
FROM tab
7777
ORDER BY cosineDistance(vec, reference_vec)
7878
LIMIT 3
79-
SETTINGS max_limit_for_ann_queries = 2; -- LIMIT 3 > 2 --> don't use the ann index
79+
SETTINGS max_limit_for_vector_search_queries = 2; -- LIMIT 3 > 2 --> don't use the ann index
8080

8181
DROP TABLE tab;
8282

0 commit comments

Comments
 (0)