@@ -46,113 +46,6 @@ def initialize(strategy)
4646 @strategy = strategy
4747 end
4848
49- def consider_indexing ( memory : "100MB" )
50- [ topic_table_name , post_table_name ] . each do |table_name |
51- index_name = index_name ( table_name )
52- # Using extension maintainer's recommendation for ivfflat indexes
53- # Results are not as good as without indexes, but it's much faster
54- # Disk usage is ~1x the size of the table, so this doubles table total size
55- count =
56- DB . query_single (
57- "SELECT count(*) FROM #{ table_name } WHERE model_id = #{ id } AND strategy_id = #{ @strategy . id } ;" ,
58- ) . first
59- lists = [ count < 1_000_000 ? count / 1000 : Math . sqrt ( count ) . to_i , 10 ] . max
60- probes = [ count < 1_000_000 ? lists / 10 : Math . sqrt ( lists ) . to_i , 1 ] . max
61- Discourse . cache . write ( "#{ table_name } -#{ id } -#{ @strategy . id } -probes" , probes )
62-
63- existing_index = DB . query_single ( <<~SQL , index_name : index_name ) . first
64- SELECT
65- indexdef
66- FROM
67- pg_indexes
68- WHERE
69- indexname = :index_name
70- AND schemaname = 'public'
71- LIMIT 1
72- SQL
73-
74- if !existing_index . present?
75- Rails . logger . info ( "Index #{ index_name } does not exist, creating..." )
76- return create_index! ( table_name , memory , lists , probes )
77- end
78-
79- existing_index_age =
80- DB
81- . query_single (
82- "SELECT pg_catalog.obj_description((:index_name)::regclass, 'pg_class');" ,
83- index_name : index_name ,
84- )
85- . first
86- . to_i || 0
87- new_rows =
88- DB . query_single (
89- "SELECT count(*) FROM #{ table_name } WHERE model_id = #{ id } AND strategy_id = #{ @strategy . id } AND created_at > '#{ Time . at ( existing_index_age ) } ';" ,
90- ) . first
91- existing_lists = existing_index . match ( /lists='(\d +)'/ ) &.captures &.first &.to_i
92-
93- if existing_index_age > 0 &&
94- existing_index_age <
95- (
96- if SiteSetting . ai_embeddings_semantic_related_topics_enabled
97- 1 . hour . ago . to_i
98- else
99- 1 . day . ago . to_i
100- end
101- )
102- if new_rows > 10_000
103- Rails . logger . info (
104- "Index #{ index_name } is #{ existing_index_age } seconds old, and there are #{ new_rows } new rows, updating..." ,
105- )
106- return create_index! ( table_name , memory , lists , probes )
107- elsif existing_lists != lists
108- Rails . logger . info (
109- "Index #{ index_name } already exists, but lists is #{ existing_lists } instead of #{ lists } , updating..." ,
110- )
111- return create_index! ( table_name , memory , lists , probes )
112- end
113- end
114-
115- Rails . logger . info (
116- "Index #{ index_name } kept. #{ Time . now . to_i - existing_index_age } seconds old, #{ new_rows } new rows, #{ existing_lists } lists, #{ probes } probes." ,
117- )
118- end
119- end
120-
121- def create_index! ( table_name , memory , lists , probes )
122- tries = 0
123- index_name = index_name ( table_name )
124- DB . exec ( "SET work_mem TO '#{ memory } ';" )
125- DB . exec ( "SET maintenance_work_mem TO '#{ memory } ';" )
126- begin
127- DB . exec ( <<~SQL )
128- DROP INDEX IF EXISTS #{ index_name } ;
129- CREATE INDEX IF NOT EXISTS
130- #{ index_name }
131- ON
132- #{ table_name }
133- USING
134- ivfflat ((embeddings::halfvec(#{ dimensions } )) #{ pg_index_type } )
135- WITH
136- (lists = #{ lists } )
137- WHERE
138- model_id = #{ id } AND strategy_id = #{ @strategy . id } ;
139- SQL
140- rescue PG ::ProgramLimitExceeded => e
141- parsed_error = e . message . match ( /memory required is (\d + [A-Z]{2}), ([a-z_]+)/ )
142- if parsed_error [ 1 ] . present? && parsed_error [ 2 ] . present?
143- DB . exec ( "SET #{ parsed_error [ 2 ] } TO '#{ parsed_error [ 1 ] . tr ( " " , "" ) } ';" )
144- tries += 1
145- retry if tries < 3
146- else
147- raise e
148- end
149- end
150-
151- DB . exec ( "COMMENT ON INDEX #{ index_name } IS '#{ Time . now . to_i } ';" )
152- DB . exec ( "RESET work_mem;" )
153- DB . exec ( "RESET maintenance_work_mem;" )
154- end
155-
15649 def vector_from ( text , asymetric : false )
15750 raise NotImplementedError
15851 end
@@ -224,14 +117,23 @@ def post_id_from_representation(raw_vector)
224117
225118 def asymmetric_topics_similarity_search ( raw_vector , limit :, offset :, return_distance : false )
226119 results = DB . query ( <<~SQL , query_embedding : raw_vector , limit : limit , offset : offset )
227- #{ probes_sql ( topic_table_name ) }
120+ WITH candidates AS (
121+ SELECT
122+ topic_id,
123+ embeddings::halfvec(#{ dimensions } ) AS embeddings
124+ FROM
125+ #{ topic_table_name }
126+ WHERE
127+ model_id = #{ id } AND strategy_id = #{ @strategy . id }
128+ ORDER BY
129+ binary_quantize(embeddings)::bit(#{ dimensions } ) <~> binary_quantize('[:query_embedding]'::halfvec(#{ dimensions } ))
130+ LIMIT :limit * 2
131+ )
228132 SELECT
229133 topic_id,
230134 embeddings::halfvec(#{ dimensions } ) #{ pg_function } '[:query_embedding]'::halfvec(#{ dimensions } ) AS distance
231135 FROM
232- #{ topic_table_name }
233- WHERE
234- model_id = #{ id } AND strategy_id = #{ @strategy . id }
136+ candidates
235137 ORDER BY
236138 embeddings::halfvec(#{ dimensions } ) #{ pg_function } '[:query_embedding]'::halfvec(#{ dimensions } )
237139 LIMIT :limit
@@ -250,18 +152,23 @@ def asymmetric_topics_similarity_search(raw_vector, limit:, offset:, return_dist
250152
251153 def asymmetric_posts_similarity_search ( raw_vector , limit :, offset :, return_distance : false )
252154 results = DB . query ( <<~SQL , query_embedding : raw_vector , limit : limit , offset : offset )
253- #{ probes_sql ( post_table_name ) }
155+ WITH candidates AS (
156+ SELECT
157+ post_id,
158+ embeddings::halfvec(#{ dimensions } ) AS embeddings
159+ FROM
160+ #{ post_table_name }
161+ WHERE
162+ model_id = #{ id } AND strategy_id = #{ @strategy . id }
163+ ORDER BY
164+ binary_quantize(embeddings)::bit(#{ dimensions } ) <~> binary_quantize('[:query_embedding]'::halfvec(#{ dimensions } ))
165+ LIMIT :limit * 2
166+ )
254167 SELECT
255168 post_id,
256169 embeddings::halfvec(#{ dimensions } ) #{ pg_function } '[:query_embedding]'::halfvec(#{ dimensions } ) AS distance
257170 FROM
258- #{ post_table_name }
259- INNER JOIN
260- posts AS p ON p.id = post_id
261- INNER JOIN
262- topics AS t ON t.id = p.topic_id AND t.archetype = 'regular'
263- WHERE
264- model_id = #{ id } AND strategy_id = #{ @strategy . id }
171+ candidates
265172 ORDER BY
266173 embeddings::halfvec(#{ dimensions } ) #{ pg_function } '[:query_embedding]'::halfvec(#{ dimensions } )
267174 LIMIT :limit
@@ -286,32 +193,41 @@ def asymmetric_rag_fragment_similarity_search(
286193 offset :,
287194 return_distance : false
288195 )
196+ # A too low limit exacerbates the the recall loss of binary quantization
197+ binary_search_limit = [ limit * 2 , 100 ] . max
289198 results =
290199 DB . query (
291200 <<~SQL ,
292- #{ probes_sql ( post_table_name ) }
293- SELECT
294- rag_document_fragment_id,
295- embeddings::halfvec(#{ dimensions } ) #{ pg_function } '[:query_embedding]'::halfvec(#{ dimensions } ) AS distance
296- FROM
297- #{ rag_fragments_table_name }
298- INNER JOIN
299- rag_document_fragments AS rdf ON rdf.id = rag_document_fragment_id
300- WHERE
301- model_id = #{ id } AND
302- strategy_id = #{ @strategy . id } AND
303- rdf.target_id = :target_id AND
304- rdf.target_type = :target_type
305- ORDER BY
306- embeddings::halfvec(#{ dimensions } ) #{ pg_function } '[:query_embedding]'::halfvec(#{ dimensions } )
307- LIMIT :limit
308- OFFSET :offset
309- SQL
201+ WITH candidates AS (
202+ SELECT
203+ rag_document_fragment_id,
204+ embeddings::halfvec(#{ dimensions } ) AS embeddings
205+ FROM
206+ #{ rag_fragments_table_name }
207+ INNER JOIN
208+ rag_document_fragments ON rag_document_fragments.id = rag_document_fragment_id
209+ WHERE
210+ model_id = #{ id } AND strategy_id = #{ @strategy . id }
211+ ORDER BY
212+ binary_quantize(embeddings)::bit(#{ dimensions } ) <~> binary_quantize('[:query_embedding]'::halfvec(#{ dimensions } ))
213+ LIMIT :binary_search_limit
214+ )
215+ SELECT
216+ rag_document_fragment_id,
217+ embeddings::halfvec(#{ dimensions } ) #{ pg_function } '[:query_embedding]'::halfvec(#{ dimensions } ) AS distance
218+ FROM
219+ candidates
220+ ORDER BY
221+ embeddings::halfvec(#{ dimensions } ) #{ pg_function } '[:query_embedding]'::halfvec(#{ dimensions } )
222+ LIMIT :limit
223+ OFFSET :offset
224+ SQL
310225 query_embedding : raw_vector ,
311226 target_id : target_id ,
312227 target_type : target_type ,
313228 limit : limit ,
314229 offset : offset ,
230+ binary_search_limit : binary_search_limit ,
315231 )
316232
317233 if return_distance
@@ -326,17 +242,8 @@ def asymmetric_rag_fragment_similarity_search(
326242
327243 def symmetric_topics_similarity_search ( topic )
328244 DB . query ( <<~SQL , topic_id : topic . id ) . map ( &:topic_id )
329- #{ probes_sql ( topic_table_name ) }
330- SELECT
331- topic_id
332- FROM
333- #{ topic_table_name }
334- WHERE
335- model_id = #{ id } AND
336- strategy_id = #{ @strategy . id }
337- ORDER BY
338- embeddings::halfvec(#{ dimensions } ) #{ pg_function } (
339- SELECT
245+ WITH le_target AS (
246+ SELECT
340247 embeddings
341248 FROM
342249 #{ topic_table_name }
@@ -345,8 +252,34 @@ def symmetric_topics_similarity_search(topic)
345252 strategy_id = #{ @strategy . id } AND
346253 topic_id = :topic_id
347254 LIMIT 1
348- )::halfvec(#{ dimensions } )
349- LIMIT 100
255+ )
256+ SELECT topic_id FROM (
257+ SELECT
258+ topic_id, embeddings
259+ FROM
260+ #{ topic_table_name }
261+ WHERE
262+ model_id = #{ id } AND
263+ strategy_id = #{ @strategy . id }
264+ ORDER BY
265+ binary_quantize(embeddings)::bit(#{ dimensions } ) <~> (
266+ SELECT
267+ binary_quantize(embeddings)::bit(#{ dimensions } )
268+ FROM
269+ le_target
270+ LIMIT 1
271+ )
272+ LIMIT 200
273+ ) AS widenet
274+ ORDER BY
275+ embeddings::halfvec(#{ dimensions } ) #{ pg_function } (
276+ SELECT
277+ embeddings::halfvec(#{ dimensions } )
278+ FROM
279+ le_target
280+ LIMIT 1
281+ )
282+ LIMIT 100;
350283 SQL
351284 rescue PG ::Error => e
352285 Rails . logger . error (
@@ -384,11 +317,6 @@ def index_name(table_name)
384317 "#{ table_name } _#{ id } _#{ @strategy . id } _search"
385318 end
386319
387- def probes_sql ( table_name )
388- probes = Discourse . cache . read ( "#{ table_name } -#{ id } -#{ @strategy . id } -probes" )
389- probes . present? ? "SET LOCAL ivfflat.probes TO #{ probes } ;" : ""
390- end
391-
392320 def name
393321 raise NotImplementedError
394322 end
0 commit comments