Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/jobs/scheduled/embeddings_backfill.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def execute(args)
Post
.joins("LEFT JOIN #{table_name} ON #{table_name}.post_id = posts.id")
.where(deleted_at: nil)
.where(post_type: Post.types[:regular])
.limit(limit - rebaked)

# First, we'll try to backfill embeddings for posts that have none
Expand Down
27 changes: 15 additions & 12 deletions lib/embeddings/vector_representations/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,21 @@ def gen_bulk_reprensentations(relation)

embedding_gen = inference_client
promised_embeddings =
relation.map do |record|
materials = { target: record, text: prepare_text(record) }

Concurrent::Promises
.fulfilled_future(materials, pool)
.then_on(pool) do |w_prepared_text|
w_prepared_text.merge(
embedding: embedding_gen.perform!(w_prepared_text[:text]),
digest: OpenSSL::Digest::SHA1.hexdigest(w_prepared_text[:text]),
)
end
end
relation
.map do |record|
prepared_text = prepare_text(record)
next if prepared_text.blank?

Concurrent::Promises
.fulfilled_future({ target: record, text: prepared_text }, pool)
.then_on(pool) do |w_prepared_text|
w_prepared_text.merge(
embedding: embedding_gen.perform!(w_prepared_text[:text]),
digest: OpenSSL::Digest::SHA1.hexdigest(w_prepared_text[:text]),
)
end
end
.compact

Concurrent::Promises
.zip(*promised_embeddings)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,13 @@ def inference_client
end

def prepare_text(record)
if inference_client.class.name.include?("DiscourseClassifier")
return "query: #{super(record)}"
prepared_text = super(record)

if prepared_text.present? && inference_client.class.name.include?("DiscourseClassifier")
return "query: #{prepared_text}"
end

super(record)
prepared_text
end
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@
expect(vector_rep.topic_id_from_representation(expected_embedding_1)).to eq(topic.id)
expect(vector_rep.topic_id_from_representation(expected_embedding_1)).to eq(topic.id)
end

it "does nothing if passed record has no content" do
expect { vector_rep.gen_bulk_reprensentations([Topic.new]) }.not_to raise_error
end
end

describe "#asymmetric_topics_similarity_search" do
Expand Down