From 4b7a81543d1d73b8a4458cc4a3c97ec3fc4bc062 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Mon, 13 Jan 2025 14:20:00 -0300 Subject: [PATCH 1/3] DEV: Embedding tables' model_id has to be a bigint --- .../20241230153300_new_embeddings_tables.rb | 85 +++++++++++++++++++ ...0250113171444_drop_old_embedding_tables.rb | 11 +++ lib/embeddings/schema.rb | 6 +- 3 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 db/migrate/20241230153300_new_embeddings_tables.rb create mode 100644 db/post_migrate/20250113171444_drop_old_embedding_tables.rb diff --git a/db/migrate/20241230153300_new_embeddings_tables.rb b/db/migrate/20241230153300_new_embeddings_tables.rb new file mode 100644 index 000000000..59ee5e691 --- /dev/null +++ b/db/migrate/20241230153300_new_embeddings_tables.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +class NewEmbeddingsTables < ActiveRecord::Migration[7.2] + def up + create_table :ai_topics_embeddings, id: false do |t| + t.bigint :topic_id, null: false + t.bigint :model_id, null: false + t.integer :model_version, null: false + t.integer :strategy_id, null: false + t.integer :strategy_version, null: false + t.text :digest, null: false + t.column :embeddings, "halfvec", null: false + t.timestamps + + t.index %i[model_id strategy_id topic_id], + unique: true, + name: "index_ai_topics_embeddings_on_model_strategy_topic" + end + + create_table :ai_posts_embeddings, id: false do |t| + t.bigint :post_id, null: false + t.bigint :model_id, null: false + t.integer :model_version, null: false + t.integer :strategy_id, null: false + t.integer :strategy_version, null: false + t.text :digest, null: false + t.column :embeddings, "halfvec", null: false + t.timestamps + + t.index %i[model_id strategy_id post_id], + unique: true, + name: "index_ai_posts_embeddings_on_model_strategy_post" + end + + create_table :ai_document_fragments_embeddings, id: false do |t| + t.bigint :rag_document_fragment_id, null: false + t.bigint :model_id, null: false + t.integer :model_version, null: false + t.integer :strategy_id, null: false + t.integer :strategy_version, null: false + t.text :digest, null: false + t.column :embeddings, "halfvec", null: false + t.timestamps + + t.index %i[model_id strategy_id rag_document_fragment_id], + unique: true, + name: "index_ai_fragments_embeddings_on_model_strategy_fragment" + end + + # Copied from 20241008054440_create_binary_indexes_for_embeddings + %w[topics posts document_fragments].each do |type| + # our supported embeddings models IDs and dimensions + [ + [1, 768], + [2, 1536], + [3, 1024], + [4, 1024], + [5, 768], + [6, 1536], + [7, 2000], + [8, 1024], + ].each { |model_id, dimensions| execute <<-SQL } + CREATE INDEX ai_#{type}_embeddings_#{model_id}_1_search_bit ON ai_#{type}_embeddings + USING hnsw ((binary_quantize(embeddings)::bit(#{dimensions})) bit_hamming_ops) + WHERE model_id = #{model_id} AND strategy_id = 1; + SQL + end + + # Copy data from old tables to new tables + execute <<-SQL + INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT * FROM ai_topic_embeddings; + + INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT * FROM ai_post_embeddings; + + INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT * FROM ai_document_fragment_embeddings; + SQL + end + + def down + raise ActiveRecord::IrreversibleMigration + end +end diff --git a/db/post_migrate/20250113171444_drop_old_embedding_tables.rb b/db/post_migrate/20250113171444_drop_old_embedding_tables.rb new file mode 100644 index 000000000..421ae14ca --- /dev/null +++ b/db/post_migrate/20250113171444_drop_old_embedding_tables.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true +class DropOldEmbeddingTables < ActiveRecord::Migration[7.2] + def up + drop_table :ai_topic_embeddings + drop_table :ai_post_embeddings + drop_table :ai_document_fragment_embeddings + end + + def down + end +end diff --git a/lib/embeddings/schema.rb b/lib/embeddings/schema.rb index 3ae51c7b5..af4785fe1 100644 --- a/lib/embeddings/schema.rb +++ b/lib/embeddings/schema.rb @@ -8,9 +8,9 @@ module DiscourseAi module Embeddings class Schema - TOPICS_TABLE = "ai_topic_embeddings" - POSTS_TABLE = "ai_post_embeddings" - RAG_DOCS_TABLE = "ai_document_fragment_embeddings" + TOPICS_TABLE = "ai_topics_embeddings" + POSTS_TABLE = "ai_posts_embeddings" + RAG_DOCS_TABLE = "ai_document_fragments_embeddings" def self.for( target_klass, From e2f439c9e0c08f3fec17167ff7c5758ba7aeff2e Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Mon, 13 Jan 2025 14:27:23 -0300 Subject: [PATCH 2/3] Drop old search_bit indexes --- ...0250113171444_drop_old_embedding_tables.rb | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/db/post_migrate/20250113171444_drop_old_embedding_tables.rb b/db/post_migrate/20250113171444_drop_old_embedding_tables.rb index 421ae14ca..e875427a6 100644 --- a/db/post_migrate/20250113171444_drop_old_embedding_tables.rb +++ b/db/post_migrate/20250113171444_drop_old_embedding_tables.rb @@ -4,6 +4,35 @@ def up drop_table :ai_topic_embeddings drop_table :ai_post_embeddings drop_table :ai_document_fragment_embeddings + + execute <<~SQL + DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search_bit; + + DROP INDEX IF EXISTS ai_post_embeddings_1_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_2_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_3_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_4_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_5_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_6_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_7_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_8_1_search_bit; + + DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit; + SQL end def down From 7515956280034fd0dd99c1fa1240b4de34f92e04 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Mon, 13 Jan 2025 16:49:41 -0300 Subject: [PATCH 3/3] copy rag fragment embeddings created during deploy window --- .../20250113171444_drop_old_embedding_tables.rb | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/db/post_migrate/20250113171444_drop_old_embedding_tables.rb b/db/post_migrate/20250113171444_drop_old_embedding_tables.rb index e875427a6..c0794142c 100644 --- a/db/post_migrate/20250113171444_drop_old_embedding_tables.rb +++ b/db/post_migrate/20250113171444_drop_old_embedding_tables.rb @@ -1,9 +1,16 @@ # frozen_string_literal: true class DropOldEmbeddingTables < ActiveRecord::Migration[7.2] def up - drop_table :ai_topic_embeddings - drop_table :ai_post_embeddings - drop_table :ai_document_fragment_embeddings + # Copy rag embeddings created during deploy. + execute <<~SQL + INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + ( + SELECT ai_document_fragment_embeddings.* + FROM ai_document_fragment_embeddings + LEFT OUTER JOIN ai_document_fragments_embeddings ON ai_document_fragment_embeddings.rag_document_fragment_id = ai_document_fragments_embeddings.rag_document_fragment_id + WHERE ai_document_fragments_embeddings.rag_document_fragment_id IS NULL + ) + SQL execute <<~SQL DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit; @@ -33,6 +40,10 @@ def up DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit; DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit; SQL + + drop_table :ai_topic_embeddings + drop_table :ai_post_embeddings + drop_table :ai_document_fragment_embeddings end def down