Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit c4d2b7d

Browse files
authored
PERF: Optimize backfill query to prevent statement timeouts (#1066)
1 parent 6721c67 commit c4d2b7d

File tree

2 files changed

+21
-17
lines changed

2 files changed

+21
-17
lines changed

db/migrate/20250114160417_backfill_topic_embeddings.rb

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,18 @@ def up
66
loop do
77
count = execute(<<~SQL).cmd_tuples
88
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
9-
SELECT source.*
10-
FROM ai_topic_embeddings source
11-
WHERE NOT EXISTS (
12-
SELECT 1
13-
FROM ai_topics_embeddings target
14-
WHERE target.model_id = source.model_id
15-
AND target.strategy_id = source.strategy_id
16-
AND target.topic_id = source.topic_id
9+
SELECT source.*
10+
FROM (
11+
SELECT old_table.*
12+
FROM ai_topic_embeddings old_table
13+
LEFT JOIN ai_topics_embeddings target ON (
14+
target.model_id = old_table.model_id AND
15+
target.strategy_id = old_table.strategy_id AND
16+
target.topic_id = old_table.topic_id
1717
)
18+
WHERE target.topic_id IS NULL
1819
LIMIT 10000
20+
) source
1921
SQL
2022

2123
break if count == 0

db/migrate/20250114160446_backfill_post_embeddings.rb

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,19 @@ def up
77

88
loop do
99
count = execute(<<~SQL).cmd_tuples
10-
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
11-
SELECT source.*
12-
FROM ai_post_embeddings source
13-
WHERE NOT EXISTS (
14-
SELECT 1
15-
FROM ai_posts_embeddings target
16-
WHERE target.model_id = source.model_id
17-
AND target.strategy_id = source.strategy_id
18-
AND target.post_id = source.post_id
10+
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
11+
SELECT source.*
12+
FROM (
13+
SELECT old_table.*
14+
FROM ai_post_embeddings old_table
15+
LEFT JOIN ai_posts_embeddings target ON (
16+
target.model_id = old_table.model_id AND
17+
target.strategy_id = old_table.strategy_id AND
18+
target.post_id = old_table.post_id
1919
)
20+
WHERE target.post_id IS NULL
2021
LIMIT 10000
22+
) source
2123
SQL
2224

2325
break if count == 0

0 commit comments

Comments
 (0)