Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 75ad967

Browse files
committed
Drop usage of #asymmetric_rag_fragment_similarity_search. Don't rely on stubs
1 parent c4148ba commit 75ad967

File tree

6 files changed

+73
-109
lines changed

6 files changed

+73
-109
lines changed

app/models/rag_document_fragment.rb

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,7 @@ def update_target_uploads(target, upload_ids)
3939
end
4040

4141
def indexing_status(persona, uploads)
42-
vector_rep = DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation
43-
44-
embeddings_table = vector_rep.rag_fragments_table_name
42+
embeddings_table = DiscourseAi::Embeddings::Schema.for(self).table
4543

4644
results =
4745
DB.query(

lib/ai_bot/personas/persona.rb

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -320,22 +320,28 @@ def rag_fragments_prompt(conversation_context, llm:, user:)
320320
interactions_vector = vector_rep.vector_from(consolidated_question)
321321

322322
rag_conversation_chunks = self.class.rag_conversation_chunks
323+
search_limit =
324+
if reranker.reranker_configured?
325+
rag_conversation_chunks * 5
326+
else
327+
rag_conversation_chunks
328+
end
329+
330+
schema = DiscourseAi::Embeddings::Schema.for(RagDocumentFragment, vector: vector_rep)
323331

324332
candidate_fragment_ids =
325-
vector_rep.asymmetric_rag_fragment_similarity_search(
326-
interactions_vector,
327-
target_type: "AiPersona",
328-
target_id: id,
329-
limit:
330-
(
331-
if reranker.reranker_configured?
332-
rag_conversation_chunks * 5
333-
else
334-
rag_conversation_chunks
335-
end
336-
),
337-
offset: 0,
338-
)
333+
schema
334+
.asymmetric_similarity_search(
335+
interactions_vector,
336+
limit: search_limit,
337+
offset: 0,
338+
) { |builder| builder.join(<<~SQL, target_id: id, target_type: "AiPersona") }
339+
rag_document_fragments ON
340+
rag_document_fragments.id = rag_document_fragment_id AND
341+
rag_document_fragments.target_id = :target_id AND
342+
rag_document_fragments.target_type = :target_type
343+
SQL
344+
.map(&:rag_document_fragment_id)
339345

340346
fragments =
341347
RagDocumentFragment.where(upload_id: upload_refs, id: candidate_fragment_ids).pluck(

lib/ai_bot/tool_runner.rb

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,17 @@ def rag_search(query, filenames: nil, limit: 10)
144144
vector_rep = DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation
145145
query_vector = vector_rep.vector_from(query)
146146
fragment_ids =
147-
vector_rep.asymmetric_rag_fragment_similarity_search(
148-
query_vector,
149-
target_type: "AiTool",
150-
target_id: tool.id,
151-
limit: limit,
152-
offset: 0,
153-
)
147+
DiscourseAi::Embeddings::Schema
148+
.for(RagDocumentFragment, vector: vector_rep)
149+
.asymmetric_similarity_search(query_vector, limit: limit, offset: 0) do |builder|
150+
builder.join(<<~SQL, target_id: tool.id, target_type: "AiTool")
151+
rag_document_fragments ON
152+
rag_document_fragments.id = rag_document_fragment_id AND
153+
rag_document_fragments.target_id = :target_id AND
154+
rag_document_fragments.target_type = :target_type
155+
SQL
156+
end
157+
.map(&:rag_document_fragment_id)
154158

155159
fragments =
156160
RagDocumentFragment.where(id: fragment_ids, upload_id: upload_refs).pluck(

lib/embeddings/schema.rb

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def asymmetric_similarity_search(embedding, limit:, offset:)
7272
/*where*/
7373
ORDER BY
7474
binary_quantize(embeddings)::bit(#{dimensions}) <~> binary_quantize('[:query_embedding]'::halfvec(#{dimensions}))
75-
LIMIT :limit * 2
75+
LIMIT :candidates_limit
7676
)
7777
SELECT
7878
#{target_column},
@@ -93,7 +93,19 @@ def asymmetric_similarity_search(embedding, limit:, offset:)
9393

9494
yield(builder) if block_given?
9595

96-
builder.query(query_embedding: embedding, limit: limit, offset: offset)
96+
if table == RAG_DOCS_TABLE
97+
# A too low limit exacerbates the the recall loss of binary quantization
98+
candidates_limit = [limit * 2, 100].max
99+
else
100+
candidates_limit = limit * 2
101+
end
102+
103+
builder.query(
104+
query_embedding: embedding,
105+
candidates_limit: candidates_limit,
106+
limit: limit,
107+
offset: offset,
108+
)
97109
rescue PG::Error => e
98110
Rails.logger.error("Error #{e} querying embeddings for model #{name}")
99111
raise MissingEmbeddingError

lib/embeddings/vector_representations/base.rb

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -132,64 +132,6 @@ def post_id_from_representation(raw_vector)
132132
SQL
133133
end
134134

135-
def asymmetric_rag_fragment_similarity_search(
136-
raw_vector,
137-
target_id:,
138-
target_type:,
139-
limit:,
140-
offset:,
141-
return_distance: false
142-
)
143-
# A too low limit exacerbates the the recall loss of binary quantization
144-
binary_search_limit = [limit * 2, 100].max
145-
results =
146-
DB.query(
147-
<<~SQL,
148-
WITH candidates AS (
149-
SELECT
150-
rag_document_fragment_id,
151-
embeddings::halfvec(#{dimensions}) AS embeddings
152-
FROM
153-
#{rag_fragments_table_name}
154-
INNER JOIN
155-
rag_document_fragments ON
156-
rag_document_fragments.id = rag_document_fragment_id AND
157-
rag_document_fragments.target_id = :target_id AND
158-
rag_document_fragments.target_type = :target_type
159-
WHERE
160-
model_id = #{id} AND strategy_id = #{@strategy.id}
161-
ORDER BY
162-
binary_quantize(embeddings)::bit(#{dimensions}) <~> binary_quantize('[:query_embedding]'::halfvec(#{dimensions}))
163-
LIMIT :binary_search_limit
164-
)
165-
SELECT
166-
rag_document_fragment_id,
167-
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
168-
FROM
169-
candidates
170-
ORDER BY
171-
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
172-
LIMIT :limit
173-
OFFSET :offset
174-
SQL
175-
query_embedding: raw_vector,
176-
target_id: target_id,
177-
target_type: target_type,
178-
limit: limit,
179-
offset: offset,
180-
binary_search_limit: binary_search_limit,
181-
)
182-
183-
if return_distance
184-
results.map { |r| [r.rag_document_fragment_id, r.distance] }
185-
else
186-
results.map(&:rag_document_fragment_id)
187-
end
188-
rescue PG::Error => e
189-
Rails.logger.error("Error #{e} querying embeddings for model #{name}")
190-
raise MissingEmbeddingError
191-
end
192-
193135
def symmetric_topics_similarity_search(topic)
194136
DB.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
195137
WITH le_target AS (

spec/lib/modules/ai_bot/personas/persona_spec.rb

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -373,41 +373,44 @@ def system_prompt
373373
end
374374

375375
context "when a persona has RAG uploads" do
376-
def stub_fragments(limit, expected_limit: nil)
377-
candidate_ids = []
378-
379-
limit.times do |i|
380-
candidate_ids << Fabricate(
381-
:rag_document_fragment,
382-
fragment: "fragment-n#{i}",
383-
target_id: ai_persona.id,
384-
target_type: "AiPersona",
385-
upload: upload,
386-
).id
387-
end
376+
let(:vector_rep) do
377+
DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation
378+
end
379+
let(:embedding_value) { 0.04381 }
380+
let(:prompt_cc_embeddings) { [embedding_value] * vector_rep.dimensions }
381+
382+
def stub_fragments(fragment_count, persona: ai_persona)
383+
schema = DiscourseAi::Embeddings::Schema.for(RagDocumentFragment, vector: vector_rep)
384+
385+
fragment_count.times do |i|
386+
fragment =
387+
Fabricate(
388+
:rag_document_fragment,
389+
fragment: "fragment-n#{i}",
390+
target_id: persona.id,
391+
target_type: "AiPersona",
392+
upload: upload,
393+
)
388394

389-
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
390-
.any_instance
391-
.expects(:asymmetric_rag_fragment_similarity_search)
392-
.with { |args, kwargs| kwargs[:limit] == (expected_limit || limit) }
393-
.returns(candidate_ids)
395+
# Similarity is determined left-to-right.
396+
embeddings = [embedding_value + "0.000#{i}".to_f] * vector_rep.dimensions
397+
398+
schema.store(fragment, embeddings, "test")
399+
end
394400
end
395401

396402
before do
397403
stored_ai_persona = AiPersona.find(ai_persona.id)
398404
UploadReference.ensure_exist!(target: stored_ai_persona, upload_ids: [upload.id])
399405

400-
context_embedding = [0.049382, 0.9999]
401406
EmbeddingsGenerationStubs.discourse_service(
402407
SiteSetting.ai_embeddings_model,
403408
with_cc.dig(:conversation_context, 0, :content),
404-
context_embedding,
409+
prompt_cc_embeddings,
405410
)
406411
end
407412

408413
context "when persona allows for less fragments" do
409-
before { stub_fragments(3) }
410-
411414
it "will only pick 3 fragments" do
412415
custom_ai_persona =
413416
Fabricate(
@@ -417,6 +420,8 @@ def stub_fragments(limit, expected_limit: nil)
417420
allowed_group_ids: [Group::AUTO_GROUPS[:trust_level_0]],
418421
)
419422

423+
stub_fragments(3, persona: custom_ai_persona)
424+
420425
UploadReference.ensure_exist!(target: custom_ai_persona, upload_ids: [upload.id])
421426

422427
custom_persona =
@@ -436,13 +441,10 @@ def stub_fragments(limit, expected_limit: nil)
436441
context "when the reranker is available" do
437442
before do
438443
SiteSetting.ai_hugging_face_tei_reranker_endpoint = "https://test.reranker.com"
439-
440-
# hard coded internal implementation, reranker takes x5 number of chunks
441-
stub_fragments(15, expected_limit: 50) # Mimic limit being more than 10 results
444+
stub_fragments(15)
442445
end
443446

444447
it "uses the re-ranker to reorder the fragments and pick the top 10 candidates" do
445-
skip "This test is flaky needs to be investigated ordering does not come back as expected"
446448
expected_reranked = (0..14).to_a.reverse.map { |idx| { index: idx } }
447449

448450
WebMock.stub_request(:post, "https://test.reranker.com/rerank").to_return(

0 commit comments

Comments
 (0)