@@ -15,6 +15,8 @@ class Schema
1515 EMBEDDING_TARGETS = %w[ topics posts document_fragments ]
1616 EMBEDDING_TABLES = [ TOPICS_TABLE , POSTS_TABLE , RAG_DOCS_TABLE ]
1717
18+ DEFAULT_HNSW_EF_SEARCH = 40
19+
1820 MissingEmbeddingError = Class . new ( StandardError )
1921
2022 class << self
@@ -132,6 +134,8 @@ def find_by_target(target)
132134 end
133135
134136 def asymmetric_similarity_search ( embedding , limit :, offset :)
137+ before_query = hnsw_search_workaround ( limit )
138+
135139 builder = DB . build ( <<~SQL )
136140 WITH candidates AS (
137141 SELECT
@@ -153,7 +157,7 @@ def asymmetric_similarity_search(embedding, limit:, offset:)
153157 ORDER BY
154158 embeddings::halfvec(#{ dimensions } ) #{ pg_function } '[:query_embedding]'::halfvec(#{ dimensions } )
155159 LIMIT :limit
156- OFFSET :offset
160+ OFFSET :offset;
157161 SQL
158162
159163 builder . where (
@@ -171,18 +175,24 @@ def asymmetric_similarity_search(embedding, limit:, offset:)
171175 candidates_limit = limit * 2
172176 end
173177
174- builder . query (
175- query_embedding : embedding ,
176- candidates_limit : candidates_limit ,
177- limit : limit ,
178- offset : offset ,
179- )
178+ ActiveRecord ::Base . transaction do
179+ DB . exec ( before_query ) if before_query . present?
180+ builder . query (
181+ query_embedding : embedding ,
182+ candidates_limit : candidates_limit ,
183+ limit : limit ,
184+ offset : offset ,
185+ )
186+ end
180187 rescue PG ::Error => e
181188 Rails . logger . error ( "Error #{ e } querying embeddings for model #{ vector_def . display_name } " )
182189 raise MissingEmbeddingError
183190 end
184191
185192 def symmetric_similarity_search ( record )
193+ limit = 200
194+ before_query = hnsw_search_workaround ( limit )
195+
186196 builder = DB . build ( <<~SQL )
187197 WITH le_target AS (
188198 SELECT
@@ -210,7 +220,7 @@ def symmetric_similarity_search(record)
210220 le_target
211221 LIMIT 1
212222 )
213- LIMIT 200
223+ LIMIT #{ limit }
214224 ) AS widenet
215225 ORDER BY
216226 embeddings::halfvec(#{ dimensions } ) #{ pg_function } (
@@ -220,14 +230,17 @@ def symmetric_similarity_search(record)
220230 le_target
221231 LIMIT 1
222232 )
223- LIMIT 100 ;
233+ LIMIT #{ limit / 2 } ;
224234 SQL
225235
226236 builder . where ( "model_id = :vid AND strategy_id = :vsid" )
227237
228238 yield ( builder ) if block_given?
229239
230- builder . query ( vid : vector_def . id , vsid : vector_def . strategy_id , target_id : record . id )
240+ ActiveRecord ::Base . transaction do
241+ DB . exec ( before_query ) if before_query . present?
242+ builder . query ( vid : vector_def . id , vsid : vector_def . strategy_id , target_id : record . id )
243+ end
231244 rescue PG ::Error => e
232245 Rails . logger . error ( "Error #{ e } querying embeddings for model #{ vector_def . display_name } " )
233246 raise MissingEmbeddingError
@@ -259,6 +272,13 @@ def store(record, embedding, digest)
259272
260273 private
261274
275+ def hnsw_search_workaround ( limit )
276+ threshold = limit * 2
277+
278+ return "" if threshold < DEFAULT_HNSW_EF_SEARCH
279+ "SET LOCAL hnsw.ef_search = #{ threshold } ;"
280+ end
281+
262282 delegate :dimensions , :pg_function , to : :vector_def
263283 end
264284 end
0 commit comments