Skip to content

Commit 5aa7dee

Browse files
authored
Merge pull request ClickHouse#79802 from rschu1ze/annoy-compat
Remove compatibility hack for Annoy and Usearch indexes
2 parents 57b66b8 + c54550a commit 5aa7dee

9 files changed

+104
-142
lines changed

src/Processors/QueryPlan/ReadFromMergeTree.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
#include <QueryPipeline/QueryPipelineBuilder.h>
3535
#include <Storages/LazilyReadInfo.h>
3636
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
37-
#include <Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h>
3837
#include <Storages/MergeTree/MergeTreeIndexMinMax.h>
3938
#include <Storages/MergeTree/MergeTreeIndexVectorSimilarity.h>
4039
#include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
@@ -1774,9 +1773,6 @@ static void buildIndexes(
17741773
if (const auto * vector_similarity_index = typeid_cast<const MergeTreeIndexVectorSimilarity *>(index_helper.get()))
17751774
condition = vector_similarity_index->createIndexCondition(filter_dag.predicate, context, vector_search_parameters);
17761775
#endif
1777-
if (const auto * legacy_vector_similarity_index = typeid_cast<const MergeTreeIndexLegacyVectorSimilarity *>(index_helper.get()))
1778-
condition = legacy_vector_similarity_index->createIndexCondition(filter_dag.predicate, context);
1779-
17801776
if (!condition)
17811777
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name);
17821778
}

src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.cpp

Lines changed: 0 additions & 40 deletions
This file was deleted.

src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h

Lines changed: 0 additions & 25 deletions
This file was deleted.

src/Storages/MergeTree/MergeTreeIndices.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -143,16 +143,6 @@ MergeTreeIndexFactory::MergeTreeIndexFactory()
143143
registerCreator("vector_similarity", vectorSimilarityIndexCreator);
144144
registerValidator("vector_similarity", vectorSimilarityIndexValidator);
145145
#endif
146-
/// ------
147-
/// TODO: remove this block at the end of 2024.
148-
/// Index types 'annoy' and 'usearch' are no longer supported as of June 2024. Their successor is index type 'vector_similarity'.
149-
/// To support loading tables with old indexes during a transition period, register dummy indexes which allow load/attaching but
150-
/// throw an exception when the user attempts to use them.
151-
registerCreator("annoy", legacyVectorSimilarityIndexCreator);
152-
registerValidator("annoy", legacyVectorSimilarityIndexValidator);
153-
registerCreator("usearch", legacyVectorSimilarityIndexCreator);
154-
registerValidator("usearch", legacyVectorSimilarityIndexValidator);
155-
/// ------
156146

157147
registerCreator("gin", ginIndexCreator);
158148
registerValidator("gin", ginIndexValidator);

src/Storages/MergeTree/MergeTreeIndices.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,9 +401,6 @@ MergeTreeIndexPtr vectorSimilarityIndexCreator(const IndexDescription & index);
401401
void vectorSimilarityIndexValidator(const IndexDescription & index, bool attach);
402402
#endif
403403

404-
MergeTreeIndexPtr legacyVectorSimilarityIndexCreator(const IndexDescription & index);
405-
void legacyVectorSimilarityIndexValidator(const IndexDescription & index, bool attach);
406-
407404
MergeTreeIndexPtr ginIndexCreator(const IndexDescription & index);
408405
void ginIndexValidator(const IndexDescription & index, bool attach);
409406

tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.reference

Lines changed: 0 additions & 2 deletions
This file was deleted.

tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.sql

Lines changed: 0 additions & 43 deletions
This file was deleted.

tests/queries/0_stateless/02354_vector_search_reference_vector_types.reference

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,20 @@
1-
Create tables with vector similarity indexs on Float32 and BFloat16 columns
2-
Run all combinations of vector search queries: column type X reference vector type
1+
Create tables with vector similarity indexs on Float64, Float32 and BFloat16 columns
2+
Run all combinations of vector search queries: column type x reference vector type
33
5
44
5
55
5
66
5
7-
Verify that the index is used for all combinations of vector search queries: column type X reference vector type
7+
5
8+
5
9+
5
10+
5
11+
5
12+
Check that the index is used for all combinations of vector search queries: column type x reference vector type
13+
Description: vector_similarity GRANULARITY 100000000
14+
Description: vector_similarity GRANULARITY 100000000
15+
Description: vector_similarity GRANULARITY 100000000
16+
Description: vector_similarity GRANULARITY 100000000
17+
Description: vector_similarity GRANULARITY 100000000
818
Description: vector_similarity GRANULARITY 100000000
919
Description: vector_similarity GRANULARITY 100000000
1020
Description: vector_similarity GRANULARITY 100000000

tests/queries/0_stateless/02354_vector_search_reference_vector_types.sql

Lines changed: 91 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,53 +5,94 @@
55

66
SET allow_experimental_vector_similarity_index = 1;
77
SET enable_analyzer = 1;
8-
SET parallel_replicas_local_plan=1; -- this setting is randomized, set it explicitly to have local plan for parallel replicas
8+
SET parallel_replicas_local_plan = 1; -- this setting is randomized, set it explicitly to force local plan for parallel replicas
99

10+
DROP TABLE IF EXISTS tab_f64;
1011
DROP TABLE IF EXISTS tab_f32;
1112
DROP TABLE IF EXISTS tab_bf16;
1213

13-
SELECT 'Create tables with vector similarity indexs on Float32 and BFloat16 columns';
14+
SELECT 'Create tables with vector similarity indexs on Float64, Float32 and BFloat16 columns';
15+
16+
CREATE TABLE tab_f64(id Int32, vec Array(Float64), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 2)) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 2;
17+
INSERT INTO tab_f64 VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
1418

1519
CREATE TABLE tab_f32(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 2)) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 2;
1620
INSERT INTO tab_f32 VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
1721

1822
CREATE TABLE tab_bf16(id Int32, vec Array(BFloat16), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 2)) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 2;
1923
INSERT INTO tab_bf16 VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
2024

25+
DROP FUNCTION IF EXISTS constF64;
26+
CREATE FUNCTION constF64 AS () -> [toFloat64(0.0), toFloat64(2.0)];
27+
2128
DROP FUNCTION IF EXISTS constF32;
2229
CREATE FUNCTION constF32 AS () -> [toFloat32(0.0), toFloat32(2.0)];
2330

2431
DROP FUNCTION IF EXISTS constBF16;
2532
CREATE FUNCTION constBF16 AS () -> [toBFloat16(0.0), toBFloat16(2.0)];
2633

27-
SELECT 'Run all combinations of vector search queries: column type X reference vector type';
34+
SELECT 'Run all combinations of vector search queries: column type x reference vector type';
2835

2936
SELECT id
30-
FROM tab_f32
37+
FROM tab_f64
38+
ORDER BY L2Distance(vec, constF64())
39+
LIMIT 1;
40+
41+
SELECT id
42+
FROM tab_f64
3143
ORDER BY L2Distance(vec, constF32())
3244
LIMIT 1;
3345

3446
SELECT id
35-
FROM tab_bf16
47+
FROM tab_f64
3648
ORDER BY L2Distance(vec, constBF16())
3749
LIMIT 1;
3850

3951
SELECT id
4052
FROM tab_f32
41-
ORDER BY L2Distance(vec, (SELECT vec FROM tab_f32 WHERE id = 5)) -- subquery evaluates to const scalar
53+
ORDER BY L2Distance(vec, constF64())
54+
LIMIT 1;
55+
56+
SELECT id
57+
FROM tab_f32
58+
ORDER BY L2Distance(vec, constF32())
59+
LIMIT 1;
60+
61+
SELECT id
62+
FROM tab_f32
63+
ORDER BY L2Distance(vec, constBF16())
64+
LIMIT 1;
65+
66+
SELECT id
67+
FROM tab_bf16
68+
ORDER BY L2Distance(vec, constF64())
4269
LIMIT 1;
4370

4471
SELECT id
4572
FROM tab_bf16
46-
ORDER BY L2Distance(vec, (SELECT vec FROM tab_bf16 WHERE id = 5)) -- subquery evaluates to const scalar
73+
ORDER BY L2Distance(vec, constF32())
74+
LIMIT 1;
75+
76+
SELECT id
77+
FROM tab_bf16
78+
ORDER BY L2Distance(vec, constBF16())
4779
LIMIT 1;
4880

49-
SELECT 'Verify that the index is used for all combinations of vector search queries: column type X reference vector type';
81+
SELECT 'Check that the index is used for all combinations of vector search queries: column type x reference vector type';
5082

5183
SELECT trimLeft(explain) AS explain FROM (
5284
EXPLAIN indexes = 1
5385
SELECT id
54-
FROM tab_f32
86+
FROM tab_f64
87+
ORDER BY L2Distance(vec, constF64())
88+
LIMIT 1
89+
)
90+
WHERE explain LIKE '%vector_similarity%';
91+
92+
SELECT trimLeft(explain) AS explain FROM (
93+
EXPLAIN indexes = 1
94+
SELECT id
95+
FROM tab_f64
5596
ORDER BY L2Distance(vec, constF32())
5697
LIMIT 1
5798
)
@@ -60,7 +101,7 @@ WHERE explain LIKE '%vector_similarity%';
60101
SELECT trimLeft(explain) AS explain FROM (
61102
EXPLAIN indexes = 1
62103
SELECT id
63-
FROM tab_bf16
104+
FROM tab_f64
64105
ORDER BY L2Distance(vec, constBF16())
65106
LIMIT 1
66107
)
@@ -70,7 +111,25 @@ SELECT trimLeft(explain) AS explain FROM (
70111
EXPLAIN indexes = 1
71112
SELECT id
72113
FROM tab_f32
73-
ORDER BY L2Distance(vec, (SELECT vec from tab_f32 WHERE id = 5))
114+
ORDER BY L2Distance(vec, constF64())
115+
LIMIT 1
116+
)
117+
WHERE explain LIKE '%vector_similarity%';
118+
119+
SELECT trimLeft(explain) AS explain FROM (
120+
EXPLAIN indexes = 1
121+
SELECT id
122+
FROM tab_f32
123+
ORDER BY L2Distance(vec, constF32())
124+
LIMIT 1
125+
)
126+
WHERE explain LIKE '%vector_similarity%';
127+
128+
SELECT trimLeft(explain) AS explain FROM (
129+
EXPLAIN indexes = 1
130+
SELECT id
131+
FROM tab_f32
132+
ORDER BY L2Distance(vec, constBF16())
74133
LIMIT 1
75134
)
76135
WHERE explain LIKE '%vector_similarity%';
@@ -79,13 +138,33 @@ SELECT trimLeft(explain) AS explain FROM (
79138
EXPLAIN indexes = 1
80139
SELECT id
81140
FROM tab_bf16
82-
ORDER BY L2Distance(vec, (SELECT vec from tab_bf16 WHERE id = 5))
141+
ORDER BY L2Distance(vec, constF64())
142+
LIMIT 1
143+
)
144+
WHERE explain LIKE '%vector_similarity%';
145+
146+
SELECT trimLeft(explain) AS explain FROM (
147+
EXPLAIN indexes = 1
148+
SELECT id
149+
FROM tab_bf16
150+
ORDER BY L2Distance(vec, constF32())
151+
LIMIT 1
152+
)
153+
WHERE explain LIKE '%vector_similarity%';
154+
155+
SELECT trimLeft(explain) AS explain FROM (
156+
EXPLAIN indexes = 1
157+
SELECT id
158+
FROM tab_bf16
159+
ORDER BY L2Distance(vec, constBF16())
83160
LIMIT 1
84161
)
85162
WHERE explain LIKE '%vector_similarity%';
86163

164+
DROP FUNCTION constF64;
87165
DROP FUNCTION constF32;
88166
DROP FUNCTION constBF16;
89167

168+
DROP TABLE tab_f64;
90169
DROP TABLE tab_f32;
91170
DROP TABLE tab_bf16;

0 commit comments

Comments
 (0)