Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 65bbcd7

Browse files
authored
DEV: Embedding tables' model_id has to be a bigint (#1058)
* DEV: Embedding tables' model_id has to be a bigint * Drop old search_bit indexes * copy rag fragment embeddings created during deploy window
1 parent d07cf51 commit 65bbcd7

File tree

3 files changed

+139
-3
lines changed

3 files changed

+139
-3
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# frozen_string_literal: true
2+
3+
class NewEmbeddingsTables < ActiveRecord::Migration[7.2]
4+
def up
5+
create_table :ai_topics_embeddings, id: false do |t|
6+
t.bigint :topic_id, null: false
7+
t.bigint :model_id, null: false
8+
t.integer :model_version, null: false
9+
t.integer :strategy_id, null: false
10+
t.integer :strategy_version, null: false
11+
t.text :digest, null: false
12+
t.column :embeddings, "halfvec", null: false
13+
t.timestamps
14+
15+
t.index %i[model_id strategy_id topic_id],
16+
unique: true,
17+
name: "index_ai_topics_embeddings_on_model_strategy_topic"
18+
end
19+
20+
create_table :ai_posts_embeddings, id: false do |t|
21+
t.bigint :post_id, null: false
22+
t.bigint :model_id, null: false
23+
t.integer :model_version, null: false
24+
t.integer :strategy_id, null: false
25+
t.integer :strategy_version, null: false
26+
t.text :digest, null: false
27+
t.column :embeddings, "halfvec", null: false
28+
t.timestamps
29+
30+
t.index %i[model_id strategy_id post_id],
31+
unique: true,
32+
name: "index_ai_posts_embeddings_on_model_strategy_post"
33+
end
34+
35+
create_table :ai_document_fragments_embeddings, id: false do |t|
36+
t.bigint :rag_document_fragment_id, null: false
37+
t.bigint :model_id, null: false
38+
t.integer :model_version, null: false
39+
t.integer :strategy_id, null: false
40+
t.integer :strategy_version, null: false
41+
t.text :digest, null: false
42+
t.column :embeddings, "halfvec", null: false
43+
t.timestamps
44+
45+
t.index %i[model_id strategy_id rag_document_fragment_id],
46+
unique: true,
47+
name: "index_ai_fragments_embeddings_on_model_strategy_fragment"
48+
end
49+
50+
# Copied from 20241008054440_create_binary_indexes_for_embeddings
51+
%w[topics posts document_fragments].each do |type|
52+
# our supported embeddings models IDs and dimensions
53+
[
54+
[1, 768],
55+
[2, 1536],
56+
[3, 1024],
57+
[4, 1024],
58+
[5, 768],
59+
[6, 1536],
60+
[7, 2000],
61+
[8, 1024],
62+
].each { |model_id, dimensions| execute <<-SQL }
63+
CREATE INDEX ai_#{type}_embeddings_#{model_id}_1_search_bit ON ai_#{type}_embeddings
64+
USING hnsw ((binary_quantize(embeddings)::bit(#{dimensions})) bit_hamming_ops)
65+
WHERE model_id = #{model_id} AND strategy_id = 1;
66+
SQL
67+
end
68+
69+
# Copy data from old tables to new tables
70+
execute <<-SQL
71+
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
72+
SELECT * FROM ai_topic_embeddings;
73+
74+
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
75+
SELECT * FROM ai_post_embeddings;
76+
77+
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
78+
SELECT * FROM ai_document_fragment_embeddings;
79+
SQL
80+
end
81+
82+
def down
83+
raise ActiveRecord::IrreversibleMigration
84+
end
85+
end
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# frozen_string_literal: true
2+
class DropOldEmbeddingTables < ActiveRecord::Migration[7.2]
3+
def up
4+
# Copy rag embeddings created during deploy.
5+
execute <<~SQL
6+
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
7+
(
8+
SELECT ai_document_fragment_embeddings.*
9+
FROM ai_document_fragment_embeddings
10+
LEFT OUTER JOIN ai_document_fragments_embeddings ON ai_document_fragment_embeddings.rag_document_fragment_id = ai_document_fragments_embeddings.rag_document_fragment_id
11+
WHERE ai_document_fragments_embeddings.rag_document_fragment_id IS NULL
12+
)
13+
SQL
14+
15+
execute <<~SQL
16+
DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit;
17+
DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search_bit;
18+
DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search_bit;
19+
DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search_bit;
20+
DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search_bit;
21+
DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search_bit;
22+
DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search_bit;
23+
DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search_bit;
24+
25+
DROP INDEX IF EXISTS ai_post_embeddings_1_1_search_bit;
26+
DROP INDEX IF EXISTS ai_post_embeddings_2_1_search_bit;
27+
DROP INDEX IF EXISTS ai_post_embeddings_3_1_search_bit;
28+
DROP INDEX IF EXISTS ai_post_embeddings_4_1_search_bit;
29+
DROP INDEX IF EXISTS ai_post_embeddings_5_1_search_bit;
30+
DROP INDEX IF EXISTS ai_post_embeddings_6_1_search_bit;
31+
DROP INDEX IF EXISTS ai_post_embeddings_7_1_search_bit;
32+
DROP INDEX IF EXISTS ai_post_embeddings_8_1_search_bit;
33+
34+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search_bit;
35+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search_bit;
36+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search_bit;
37+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search_bit;
38+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search_bit;
39+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search_bit;
40+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit;
41+
DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit;
42+
SQL
43+
44+
drop_table :ai_topic_embeddings
45+
drop_table :ai_post_embeddings
46+
drop_table :ai_document_fragment_embeddings
47+
end
48+
49+
def down
50+
end
51+
end

lib/embeddings/schema.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
module DiscourseAi
99
module Embeddings
1010
class Schema
11-
TOPICS_TABLE = "ai_topic_embeddings"
12-
POSTS_TABLE = "ai_post_embeddings"
13-
RAG_DOCS_TABLE = "ai_document_fragment_embeddings"
11+
TOPICS_TABLE = "ai_topics_embeddings"
12+
POSTS_TABLE = "ai_posts_embeddings"
13+
RAG_DOCS_TABLE = "ai_document_fragments_embeddings"
1414

1515
def self.for(
1616
target_klass,

0 commit comments

Comments
 (0)