Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit a5a09fa

Browse files
committed
Drop usage of #symmetric_topics_similarity_search. Don't rely on stubs
1 parent 75ad967 commit a5a09fa

File tree

4 files changed

+19
-67
lines changed

4 files changed

+19
-67
lines changed

lib/embeddings/semantic_related.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@ def related_topic_ids_for(topic)
1919
Discourse
2020
.cache
2121
.fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do
22-
vector_rep
23-
.symmetric_topics_similarity_search(topic)
22+
DiscourseAi::Embeddings::Schema
23+
.for(Topic, vector: vector_rep)
24+
.symmetric_similarity_search(topic)
25+
.map(&:topic_id)
2426
.tap do |candidate_ids|
2527
# Happens when the topic doesn't have any embeddings
2628
# I'd rather not use Exceptions to control the flow, so this should be refactored soon

lib/embeddings/vector_representations/base.rb

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -132,54 +132,6 @@ def post_id_from_representation(raw_vector)
132132
SQL
133133
end
134134

135-
def symmetric_topics_similarity_search(topic)
136-
DB.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
137-
WITH le_target AS (
138-
SELECT
139-
embeddings
140-
FROM
141-
#{topic_table_name}
142-
WHERE
143-
model_id = #{id} AND
144-
strategy_id = #{@strategy.id} AND
145-
topic_id = :topic_id
146-
LIMIT 1
147-
)
148-
SELECT topic_id FROM (
149-
SELECT
150-
topic_id, embeddings
151-
FROM
152-
#{topic_table_name}
153-
WHERE
154-
model_id = #{id} AND
155-
strategy_id = #{@strategy.id}
156-
ORDER BY
157-
binary_quantize(embeddings)::bit(#{dimensions}) <~> (
158-
SELECT
159-
binary_quantize(embeddings)::bit(#{dimensions})
160-
FROM
161-
le_target
162-
LIMIT 1
163-
)
164-
LIMIT 200
165-
) AS widenet
166-
ORDER BY
167-
embeddings::halfvec(#{dimensions}) #{pg_function} (
168-
SELECT
169-
embeddings::halfvec(#{dimensions})
170-
FROM
171-
le_target
172-
LIMIT 1
173-
)
174-
LIMIT 100;
175-
SQL
176-
rescue PG::Error => e
177-
Rails.logger.error(
178-
"Error #{e} querying embeddings for topic #{topic.id} and model #{name}",
179-
)
180-
raise MissingEmbeddingError
181-
end
182-
183135
def topic_table_name
184136
"ai_topic_embeddings"
185137
end

spec/lib/modules/ai_bot/personas/persona_spec.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,9 @@ def stub_fragments(fragment_count, persona: ai_persona)
445445
end
446446

447447
it "uses the re-ranker to reorder the fragments and pick the top 10 candidates" do
448-
expected_reranked = (0..14).to_a.reverse.map { |idx| { index: idx } }
448+
# The re-ranker reverses the similarity search, but return less results
449+
# to act as a limit for test-purposes.
450+
expected_reranked = (4..14).to_a.reverse.map { |idx| { index: idx } }
449451

450452
WebMock.stub_request(:post, "https://test.reranker.com/rerank").to_return(
451453
status: 200,

spec/lib/modules/embeddings/semantic_topic_query_spec.rb

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,19 @@
99

1010
fab!(:target) { Fabricate(:topic) }
1111

12-
def stub_semantic_search_with(results)
13-
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
14-
.any_instance
15-
.expects(:symmetric_topics_similarity_search)
16-
.returns(results.concat([target.id]))
12+
def seed_embeddings(topics)
13+
schema = DiscourseAi::Embeddings::Schema.for(Topic)
14+
15+
embeddings = [1] * 1024
16+
(topics << target).each { |t| schema.store(t, embeddings, "digest") }
1717
end
1818

1919
after { DiscourseAi::Embeddings::SemanticRelated.clear_cache_for(target) }
2020

2121
context "when the semantic search returns an unlisted topic" do
2222
fab!(:unlisted_topic) { Fabricate(:topic, visible: false) }
2323

24-
before { stub_semantic_search_with([unlisted_topic.id]) }
24+
before { seed_embeddings([unlisted_topic]) }
2525

2626
it "filters it out" do
2727
expect(topic_query.list_semantic_related_topics(target).topics).to be_empty
@@ -31,7 +31,7 @@ def stub_semantic_search_with(results)
3131
context "when the semantic search returns a private topic" do
3232
fab!(:private_topic) { Fabricate(:private_message_topic) }
3333

34-
before { stub_semantic_search_with([private_topic.id]) }
34+
before { seed_embeddings([private_topic]) }
3535

3636
it "filters it out" do
3737
expect(topic_query.list_semantic_related_topics(target).topics).to be_empty
@@ -43,7 +43,7 @@ def stub_semantic_search_with(results)
4343
fab!(:category) { Fabricate(:private_category, group: group) }
4444
fab!(:secured_category_topic) { Fabricate(:topic, category: category) }
4545

46-
before { stub_semantic_search_with([secured_category_topic.id]) }
46+
before { seed_embeddings([secured_category_topic]) }
4747

4848
it "filters it out" do
4949
expect(topic_query.list_semantic_related_topics(target).topics).to be_empty
@@ -63,7 +63,7 @@ def stub_semantic_search_with(results)
6363

6464
before do
6565
SiteSetting.ai_embeddings_semantic_related_include_closed_topics = false
66-
stub_semantic_search_with([closed_topic.id])
66+
seed_embeddings([closed_topic])
6767
end
6868

6969
it "filters it out" do
@@ -80,7 +80,7 @@ def stub_semantic_search_with(results)
8080
category_id: category.id,
8181
notification_level: CategoryUser.notification_levels[:muted],
8282
)
83-
stub_semantic_search_with([topic.id])
83+
seed_embeddings([topic])
8484
expect(topic_query.list_semantic_related_topics(target).topics).not_to include(topic)
8585
end
8686
end
@@ -91,11 +91,7 @@ def stub_semantic_search_with(results)
9191
fab!(:normal_topic_3) { Fabricate(:topic) }
9292
fab!(:closed_topic) { Fabricate(:topic, closed: true) }
9393

94-
before do
95-
stub_semantic_search_with(
96-
[closed_topic.id, normal_topic_1.id, normal_topic_2.id, normal_topic_3.id],
97-
)
98-
end
94+
before { seed_embeddings([closed_topic, normal_topic_1, normal_topic_2, normal_topic_3]) }
9995

10096
it "filters it out" do
10197
expect(topic_query.list_semantic_related_topics(target).topics).to eq(
@@ -117,7 +113,7 @@ def stub_semantic_search_with(results)
117113
fab!(:included_topic) { Fabricate(:topic) }
118114
fab!(:excluded_topic) { Fabricate(:topic) }
119115

120-
before { stub_semantic_search_with([included_topic.id, excluded_topic.id]) }
116+
before { seed_embeddings([included_topic, excluded_topic]) }
121117

122118
let(:modifier_block) { Proc.new { |query| query.where.not(id: excluded_topic.id) } }
123119

0 commit comments

Comments
 (0)