Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 791fad1

Browse files
xfalcoxromanrizzi
andauthored
FEATURE: Index embeddings using bit vectors (#824)
On very large sites, the rare cache misses for Related Topics can take around 200ms, which affects our p99 metric on the topic page. In order to mitigate this impact, we now have several tools at our disposal. First, one is to migrate the index embedding type from halfvec to bit and change the related topic query to leverage the new bit index by changing the search algorithm from inner product to Hamming distance. This will reduce our index sizes by 90%, severely reducing the impact of embeddings on our storage. By making the related query a bit smarter, we can have zero impact on recall by using the index to over-capture N*2 results, then re-ordering those N*2 using the full halfvec vectors and taking the top N. The expected impact is to go from 200ms to <20ms for cache misses and from a 2.5GB index to a 250MB index on a large site. Another tool is migrating our index type from IVFFLAT to HNSW, which can increase the cache misses performance even further, eventually putting us in the under 5ms territory. Co-authored-by: Roman Rizzi <[email protected]>
1 parent 6615104 commit 791fad1

File tree

6 files changed

+147
-168
lines changed

6 files changed

+147
-168
lines changed

app/jobs/scheduled/embeddings_backfill.rb

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ def execute(args)
3535

3636
rebaked += populate_topic_embeddings(vector_rep, topics)
3737

38-
vector_rep.consider_indexing
39-
4038
return if rebaked >= limit
4139

4240
# Then, we'll try to backfill embeddings for topics that have outdated
@@ -82,8 +80,6 @@ def execute(args)
8280
rebaked += 1
8381
end
8482

85-
vector_rep.consider_indexing
86-
8783
return if rebaked >= limit
8884

8985
# Then, we'll try to backfill embeddings for posts that have outdated

db/migrate/20240611170905_move_embeddings_to_single_table_per_type.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ def up
150150
strategy = DiscourseAi::Embeddings::Strategies::Truncation.new
151151
vector_rep =
152152
DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(strategy)
153-
vector_rep.consider_indexing
154153
rescue StandardError => e
155154
Rails.logger.error("Failed to index embeddings: #{e}")
156155
end
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# frozen_string_literal: true
2+
3+
class CreateBinaryIndexesForEmbeddings < ActiveRecord::Migration[7.1]
4+
def up
5+
%w[topic post document_fragment].each do |type|
6+
# our supported embeddings models IDs and dimensions
7+
[
8+
[1, 768],
9+
[2, 1536],
10+
[3, 1024],
11+
[4, 1024],
12+
[5, 768],
13+
[6, 1536],
14+
[7, 2000],
15+
[8, 1024],
16+
].each { |model_id, dimensions| execute <<-SQL }
17+
CREATE INDEX ai_#{type}_embeddings_#{model_id}_1_search_bit ON ai_#{type}_embeddings
18+
USING hnsw ((binary_quantize(embeddings)::bit(#{dimensions})) bit_hamming_ops)
19+
WHERE model_id = #{model_id} AND strategy_id = 1;
20+
SQL
21+
end
22+
end
23+
24+
def down
25+
raise ActiveRecord::IrreversibleMigration
26+
end
27+
end
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# frozen_string_literal: true
2+
class DropOldEmbeddingsIndexes < ActiveRecord::Migration[7.1]
3+
def up
4+
execute <<~SQL
5+
DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search;
6+
DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search;
7+
DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search;
8+
DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search;
9+
DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search;
10+
DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search;
11+
DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search;
12+
DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search;
13+
14+
DROP INDEX IF EXISTS ai_post_embeddings_1_1_search;
15+
DROP INDEX IF EXISTS ai_post_embeddings_2_1_search;
16+
DROP INDEX IF EXISTS ai_post_embeddings_3_1_search;
17+
DROP INDEX IF EXISTS ai_post_embeddings_4_1_search;
18+
DROP INDEX IF EXISTS ai_post_embeddings_5_1_search;
19+
DROP INDEX IF EXISTS ai_post_embeddings_6_1_search;
20+
DROP INDEX IF EXISTS ai_post_embeddings_7_1_search;
21+
DROP INDEX IF EXISTS ai_post_embeddings_8_1_search;
22+
23+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search;
24+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search;
25+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search;
26+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search;
27+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search;
28+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search;
29+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search;
30+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search;
31+
SQL
32+
end
33+
34+
def down
35+
raise ActiveRecord::IrreversibleMigration
36+
end
37+
end

lib/embeddings/vector_representations/base.rb

Lines changed: 83 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -46,113 +46,6 @@ def initialize(strategy)
4646
@strategy = strategy
4747
end
4848

49-
def consider_indexing(memory: "100MB")
50-
[topic_table_name, post_table_name].each do |table_name|
51-
index_name = index_name(table_name)
52-
# Using extension maintainer's recommendation for ivfflat indexes
53-
# Results are not as good as without indexes, but it's much faster
54-
# Disk usage is ~1x the size of the table, so this doubles table total size
55-
count =
56-
DB.query_single(
57-
"SELECT count(*) FROM #{table_name} WHERE model_id = #{id} AND strategy_id = #{@strategy.id};",
58-
).first
59-
lists = [count < 1_000_000 ? count / 1000 : Math.sqrt(count).to_i, 10].max
60-
probes = [count < 1_000_000 ? lists / 10 : Math.sqrt(lists).to_i, 1].max
61-
Discourse.cache.write("#{table_name}-#{id}-#{@strategy.id}-probes", probes)
62-
63-
existing_index = DB.query_single(<<~SQL, index_name: index_name).first
64-
SELECT
65-
indexdef
66-
FROM
67-
pg_indexes
68-
WHERE
69-
indexname = :index_name
70-
AND schemaname = 'public'
71-
LIMIT 1
72-
SQL
73-
74-
if !existing_index.present?
75-
Rails.logger.info("Index #{index_name} does not exist, creating...")
76-
return create_index!(table_name, memory, lists, probes)
77-
end
78-
79-
existing_index_age =
80-
DB
81-
.query_single(
82-
"SELECT pg_catalog.obj_description((:index_name)::regclass, 'pg_class');",
83-
index_name: index_name,
84-
)
85-
.first
86-
.to_i || 0
87-
new_rows =
88-
DB.query_single(
89-
"SELECT count(*) FROM #{table_name} WHERE model_id = #{id} AND strategy_id = #{@strategy.id} AND created_at > '#{Time.at(existing_index_age)}';",
90-
).first
91-
existing_lists = existing_index.match(/lists='(\d+)'/)&.captures&.first&.to_i
92-
93-
if existing_index_age > 0 &&
94-
existing_index_age <
95-
(
96-
if SiteSetting.ai_embeddings_semantic_related_topics_enabled
97-
1.hour.ago.to_i
98-
else
99-
1.day.ago.to_i
100-
end
101-
)
102-
if new_rows > 10_000
103-
Rails.logger.info(
104-
"Index #{index_name} is #{existing_index_age} seconds old, and there are #{new_rows} new rows, updating...",
105-
)
106-
return create_index!(table_name, memory, lists, probes)
107-
elsif existing_lists != lists
108-
Rails.logger.info(
109-
"Index #{index_name} already exists, but lists is #{existing_lists} instead of #{lists}, updating...",
110-
)
111-
return create_index!(table_name, memory, lists, probes)
112-
end
113-
end
114-
115-
Rails.logger.info(
116-
"Index #{index_name} kept. #{Time.now.to_i - existing_index_age} seconds old, #{new_rows} new rows, #{existing_lists} lists, #{probes} probes.",
117-
)
118-
end
119-
end
120-
121-
def create_index!(table_name, memory, lists, probes)
122-
tries = 0
123-
index_name = index_name(table_name)
124-
DB.exec("SET work_mem TO '#{memory}';")
125-
DB.exec("SET maintenance_work_mem TO '#{memory}';")
126-
begin
127-
DB.exec(<<~SQL)
128-
DROP INDEX IF EXISTS #{index_name};
129-
CREATE INDEX IF NOT EXISTS
130-
#{index_name}
131-
ON
132-
#{table_name}
133-
USING
134-
ivfflat ((embeddings::halfvec(#{dimensions})) #{pg_index_type})
135-
WITH
136-
(lists = #{lists})
137-
WHERE
138-
model_id = #{id} AND strategy_id = #{@strategy.id};
139-
SQL
140-
rescue PG::ProgramLimitExceeded => e
141-
parsed_error = e.message.match(/memory required is (\d+ [A-Z]{2}), ([a-z_]+)/)
142-
if parsed_error[1].present? && parsed_error[2].present?
143-
DB.exec("SET #{parsed_error[2]} TO '#{parsed_error[1].tr(" ", "")}';")
144-
tries += 1
145-
retry if tries < 3
146-
else
147-
raise e
148-
end
149-
end
150-
151-
DB.exec("COMMENT ON INDEX #{index_name} IS '#{Time.now.to_i}';")
152-
DB.exec("RESET work_mem;")
153-
DB.exec("RESET maintenance_work_mem;")
154-
end
155-
15649
def vector_from(text, asymetric: false)
15750
raise NotImplementedError
15851
end
@@ -224,14 +117,23 @@ def post_id_from_representation(raw_vector)
224117

225118
def asymmetric_topics_similarity_search(raw_vector, limit:, offset:, return_distance: false)
226119
results = DB.query(<<~SQL, query_embedding: raw_vector, limit: limit, offset: offset)
227-
#{probes_sql(topic_table_name)}
120+
WITH candidates AS (
121+
SELECT
122+
topic_id,
123+
embeddings::halfvec(#{dimensions}) AS embeddings
124+
FROM
125+
#{topic_table_name}
126+
WHERE
127+
model_id = #{id} AND strategy_id = #{@strategy.id}
128+
ORDER BY
129+
binary_quantize(embeddings)::bit(#{dimensions}) <~> binary_quantize('[:query_embedding]'::halfvec(#{dimensions}))
130+
LIMIT :limit * 2
131+
)
228132
SELECT
229133
topic_id,
230134
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
231135
FROM
232-
#{topic_table_name}
233-
WHERE
234-
model_id = #{id} AND strategy_id = #{@strategy.id}
136+
candidates
235137
ORDER BY
236138
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
237139
LIMIT :limit
@@ -250,18 +152,23 @@ def asymmetric_topics_similarity_search(raw_vector, limit:, offset:, return_dist
250152

251153
def asymmetric_posts_similarity_search(raw_vector, limit:, offset:, return_distance: false)
252154
results = DB.query(<<~SQL, query_embedding: raw_vector, limit: limit, offset: offset)
253-
#{probes_sql(post_table_name)}
155+
WITH candidates AS (
156+
SELECT
157+
post_id,
158+
embeddings::halfvec(#{dimensions}) AS embeddings
159+
FROM
160+
#{post_table_name}
161+
WHERE
162+
model_id = #{id} AND strategy_id = #{@strategy.id}
163+
ORDER BY
164+
binary_quantize(embeddings)::bit(#{dimensions}) <~> binary_quantize('[:query_embedding]'::halfvec(#{dimensions}))
165+
LIMIT :limit * 2
166+
)
254167
SELECT
255168
post_id,
256169
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
257170
FROM
258-
#{post_table_name}
259-
INNER JOIN
260-
posts AS p ON p.id = post_id
261-
INNER JOIN
262-
topics AS t ON t.id = p.topic_id AND t.archetype = 'regular'
263-
WHERE
264-
model_id = #{id} AND strategy_id = #{@strategy.id}
171+
candidates
265172
ORDER BY
266173
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
267174
LIMIT :limit
@@ -286,32 +193,41 @@ def asymmetric_rag_fragment_similarity_search(
286193
offset:,
287194
return_distance: false
288195
)
196+
# A too low limit exacerbates the the recall loss of binary quantization
197+
binary_search_limit = [limit * 2, 100].max
289198
results =
290199
DB.query(
291200
<<~SQL,
292-
#{probes_sql(post_table_name)}
293-
SELECT
294-
rag_document_fragment_id,
295-
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
296-
FROM
297-
#{rag_fragments_table_name}
298-
INNER JOIN
299-
rag_document_fragments AS rdf ON rdf.id = rag_document_fragment_id
300-
WHERE
301-
model_id = #{id} AND
302-
strategy_id = #{@strategy.id} AND
303-
rdf.target_id = :target_id AND
304-
rdf.target_type = :target_type
305-
ORDER BY
306-
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
307-
LIMIT :limit
308-
OFFSET :offset
309-
SQL
201+
WITH candidates AS (
202+
SELECT
203+
rag_document_fragment_id,
204+
embeddings::halfvec(#{dimensions}) AS embeddings
205+
FROM
206+
#{rag_fragments_table_name}
207+
INNER JOIN
208+
rag_document_fragments ON rag_document_fragments.id = rag_document_fragment_id
209+
WHERE
210+
model_id = #{id} AND strategy_id = #{@strategy.id}
211+
ORDER BY
212+
binary_quantize(embeddings)::bit(#{dimensions}) <~> binary_quantize('[:query_embedding]'::halfvec(#{dimensions}))
213+
LIMIT :binary_search_limit
214+
)
215+
SELECT
216+
rag_document_fragment_id,
217+
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
218+
FROM
219+
candidates
220+
ORDER BY
221+
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
222+
LIMIT :limit
223+
OFFSET :offset
224+
SQL
310225
query_embedding: raw_vector,
311226
target_id: target_id,
312227
target_type: target_type,
313228
limit: limit,
314229
offset: offset,
230+
binary_search_limit: binary_search_limit,
315231
)
316232

317233
if return_distance
@@ -326,17 +242,8 @@ def asymmetric_rag_fragment_similarity_search(
326242

327243
def symmetric_topics_similarity_search(topic)
328244
DB.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
329-
#{probes_sql(topic_table_name)}
330-
SELECT
331-
topic_id
332-
FROM
333-
#{topic_table_name}
334-
WHERE
335-
model_id = #{id} AND
336-
strategy_id = #{@strategy.id}
337-
ORDER BY
338-
embeddings::halfvec(#{dimensions}) #{pg_function} (
339-
SELECT
245+
WITH le_target AS (
246+
SELECT
340247
embeddings
341248
FROM
342249
#{topic_table_name}
@@ -345,8 +252,34 @@ def symmetric_topics_similarity_search(topic)
345252
strategy_id = #{@strategy.id} AND
346253
topic_id = :topic_id
347254
LIMIT 1
348-
)::halfvec(#{dimensions})
349-
LIMIT 100
255+
)
256+
SELECT topic_id FROM (
257+
SELECT
258+
topic_id, embeddings
259+
FROM
260+
#{topic_table_name}
261+
WHERE
262+
model_id = #{id} AND
263+
strategy_id = #{@strategy.id}
264+
ORDER BY
265+
binary_quantize(embeddings)::bit(#{dimensions}) <~> (
266+
SELECT
267+
binary_quantize(embeddings)::bit(#{dimensions})
268+
FROM
269+
le_target
270+
LIMIT 1
271+
)
272+
LIMIT 200
273+
) AS widenet
274+
ORDER BY
275+
embeddings::halfvec(#{dimensions}) #{pg_function} (
276+
SELECT
277+
embeddings::halfvec(#{dimensions})
278+
FROM
279+
le_target
280+
LIMIT 1
281+
)
282+
LIMIT 100;
350283
SQL
351284
rescue PG::Error => e
352285
Rails.logger.error(
@@ -384,11 +317,6 @@ def index_name(table_name)
384317
"#{table_name}_#{id}_#{@strategy.id}_search"
385318
end
386319

387-
def probes_sql(table_name)
388-
probes = Discourse.cache.read("#{table_name}-#{id}-#{@strategy.id}-probes")
389-
probes.present? ? "SET LOCAL ivfflat.probes TO #{probes};" : ""
390-
end
391-
392320
def name
393321
raise NotImplementedError
394322
end

0 commit comments

Comments
 (0)