Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 1a8b551

Browse files
committed
FIX: Do batches for backfilling huge embeddings tables
1 parent 356ea77 commit 1a8b551

File tree

2 files changed

+34
-12
lines changed

2 files changed

+34
-12
lines changed

db/migrate/20250114160417_backfill_topic_embeddings.rb

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
11
# frozen_string_literal: true
22
class BackfillTopicEmbeddings < ActiveRecord::Migration[7.2]
3-
def up
4-
not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_topics_embeddings").first.to_i == 0
3+
disable_ddl_transaction!
54

6-
if not_backfilled
7-
# Copy data from old tables to new tables
8-
execute <<~SQL
9-
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
10-
SELECT * FROM ai_topic_embeddings;
5+
def up
6+
loop do
7+
count = execute(<<~SQL).cmd_tuples
8+
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
9+
SELECT source.*
10+
FROM ai_topic_embeddings source
11+
WHERE NOT EXISTS (
12+
SELECT 1
13+
FROM ai_topics_embeddings target
14+
WHERE target.model_id = source.model_id
15+
AND target.strategy_id = source.strategy_id
16+
AND target.topic_id = source.topic_id
17+
)
18+
LIMIT 10000
1119
SQL
20+
21+
break if count == 0
1222
end
1323
end
1424

db/migrate/20250114160446_backfill_post_embeddings.rb

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,26 @@
11
# frozen_string_literal: true
22
class BackfillPostEmbeddings < ActiveRecord::Migration[7.2]
3+
disable_ddl_transaction!
4+
35
def up
4-
not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_posts_embeddings").first.to_i == 0
6+
# Copy data from old tables to new tables in batches.
57

6-
if not_backfilled
7-
# Copy data from old tables to new tables
8-
execute <<~SQL
8+
loop do
9+
count = execute(<<~SQL).cmd_tuples
910
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
10-
SELECT * FROM ai_post_embeddings;
11+
SELECT source.*
12+
FROM ai_post_embeddings source
13+
WHERE NOT EXISTS (
14+
SELECT 1
15+
FROM ai_posts_embeddings target
16+
WHERE target.model_id = source.model_id
17+
AND target.strategy_id = source.strategy_id
18+
AND target.post_id = source.post_id
19+
)
20+
LIMIT 10000
1121
SQL
22+
23+
break if count == 0
1224
end
1325
end
1426

0 commit comments

Comments
 (0)