Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit f5cf101

Browse files
authored
FEATURE: configurable embeddings (#1049)
* Use AR model for embeddings features * endpoints * Embeddings CRUD UI * Add presets. Hide a couple more settings * system specs * Seed embedding definition from old settings * Generate search bit index on the fly. cleanup orphaned data * support for seeded models * Fix run test for new embedding * fix selected model not set correctly
1 parent fad4b65 commit f5cf101

File tree

78 files changed

+2124
-1001
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+2124
-1001
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import DiscourseRoute from "discourse/routes/discourse";
2+
3+
export default class AdminPluginsShowDiscourseAiEmbeddingsEdit extends DiscourseRoute {
4+
async model(params) {
5+
const allEmbeddings = this.modelFor(
6+
"adminPlugins.show.discourse-ai-embeddings"
7+
);
8+
const id = parseInt(params.id, 10);
9+
const record = allEmbeddings.findBy("id", id);
10+
record.provider_params = record.provider_params || {};
11+
return record;
12+
}
13+
14+
setupController(controller, model) {
15+
super.setupController(controller, model);
16+
controller.set(
17+
"allEmbeddings",
18+
this.modelFor("adminPlugins.show.discourse-ai-embeddings")
19+
);
20+
}
21+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import DiscourseRoute from "discourse/routes/discourse";
2+
3+
export default class AdminPluginsShowDiscourseAiEmbeddingsNew extends DiscourseRoute {
4+
async model() {
5+
const record = this.store.createRecord("ai-embedding");
6+
record.provider_params = {};
7+
return record;
8+
}
9+
10+
setupController(controller, model) {
11+
super.setupController(controller, model);
12+
controller.set(
13+
"allEmbeddings",
14+
this.modelFor("adminPlugins.show.discourse-ai-embeddings")
15+
);
16+
}
17+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import DiscourseRoute from "discourse/routes/discourse";
2+
3+
export default class DiscourseAiAiEmbeddingsRoute extends DiscourseRoute {
4+
model() {
5+
return this.store.findAll("ai-embedding");
6+
}
7+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<AiEmbeddingsListEditor
2+
@embeddings={{this.allEmbeddings}}
3+
@currentEmbedding={{this.model}}
4+
/>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<AiEmbeddingsListEditor @embeddings={{this.model}} />
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<AiEmbeddingsListEditor
2+
@embeddings={{this.allEmbeddings}}
3+
@currentEmbedding={{this.model}}
4+
/>
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Admin
5+
class AiEmbeddingsController < ::Admin::AdminController
6+
requires_plugin ::DiscourseAi::PLUGIN_NAME
7+
8+
def index
9+
embedding_defs = EmbeddingDefinition.all.order(:display_name)
10+
11+
render json: {
12+
ai_embeddings:
13+
ActiveModel::ArraySerializer.new(
14+
embedding_defs,
15+
each_serializer: AiEmbeddingDefinitionSerializer,
16+
root: false,
17+
).as_json,
18+
meta: {
19+
provider_params: EmbeddingDefinition.provider_params,
20+
providers: EmbeddingDefinition.provider_names,
21+
distance_functions: EmbeddingDefinition.distance_functions,
22+
tokenizers:
23+
EmbeddingDefinition.tokenizer_names.map { |tn|
24+
{ id: tn, name: tn.split("::").last }
25+
},
26+
presets: EmbeddingDefinition.presets,
27+
},
28+
}
29+
end
30+
31+
def new
32+
end
33+
34+
def edit
35+
embedding_def = EmbeddingDefinition.find(params[:id])
36+
render json: AiEmbeddingDefinitionSerializer.new(embedding_def)
37+
end
38+
39+
def create
40+
embedding_def = EmbeddingDefinition.new(ai_embeddings_params)
41+
42+
if embedding_def.save
43+
render json: AiEmbeddingDefinitionSerializer.new(embedding_def), status: :created
44+
else
45+
render_json_error embedding_def
46+
end
47+
end
48+
49+
def update
50+
embedding_def = EmbeddingDefinition.find(params[:id])
51+
52+
if embedding_def.seeded?
53+
return(
54+
render_json_error(I18n.t("discourse_ai.embeddings.cannot_edit_builtin"), status: 403)
55+
)
56+
end
57+
58+
if embedding_def.update(ai_embeddings_params.except(:dimensions))
59+
render json: AiEmbeddingDefinitionSerializer.new(embedding_def)
60+
else
61+
render_json_error embedding_def
62+
end
63+
end
64+
65+
def destroy
66+
embedding_def = EmbeddingDefinition.find(params[:id])
67+
68+
if embedding_def.seeded?
69+
return(
70+
render_json_error(I18n.t("discourse_ai.embeddings.cannot_edit_builtin"), status: 403)
71+
)
72+
end
73+
74+
if embedding_def.id == SiteSetting.ai_embeddings_selected_model.to_i
75+
return render_json_error(I18n.t("discourse_ai.embeddings.delete_failed"), status: 409)
76+
end
77+
78+
if embedding_def.destroy
79+
head :no_content
80+
else
81+
render_json_error embedding_def
82+
end
83+
end
84+
85+
def test
86+
RateLimiter.new(
87+
current_user,
88+
"ai_embeddings_test_#{current_user.id}",
89+
3,
90+
1.minute,
91+
).performed!
92+
93+
embedding_def = EmbeddingDefinition.new(ai_embeddings_params)
94+
DiscourseAi::Embeddings::Vector.new(embedding_def).vector_from("this is a test")
95+
96+
render json: { success: true }
97+
rescue Net::HTTPBadResponse => e
98+
render json: { success: false, error: e.message }
99+
end
100+
101+
private
102+
103+
def ai_embeddings_params
104+
permitted =
105+
params.require(:ai_embedding).permit(
106+
:display_name,
107+
:dimensions,
108+
:max_sequence_length,
109+
:pg_function,
110+
:provider,
111+
:url,
112+
:api_key,
113+
:tokenizer_class,
114+
)
115+
116+
extra_field_names = EmbeddingDefinition.provider_params.dig(permitted[:provider]&.to_sym)
117+
if extra_field_names.present?
118+
received_prov_params =
119+
params.dig(:ai_embedding, :provider_params)&.slice(*extra_field_names.keys)
120+
121+
if received_prov_params.present?
122+
permitted[:provider_params] = received_prov_params.permit!
123+
end
124+
end
125+
126+
permitted
127+
end
128+
end
129+
end
130+
end

app/jobs/regular/digest_rag_upload.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def execute(args)
1818
target = target_type.constantize.find_by(id: target_id)
1919
return if !target
2020

21-
vector_rep = DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation
21+
vector_rep = DiscourseAi::Embeddings::Vector.instance
2222

2323
tokenizer = vector_rep.tokenizer
2424
chunk_tokens = target.rag_chunk_tokens
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# frozen_string_literal: true
2+
3+
module ::Jobs
4+
class ManageEmbeddingDefSearchIndex < ::Jobs::Base
5+
def execute(args)
6+
embedding_def = EmbeddingDefinition.find_by(id: args[:id])
7+
return if embedding_def.nil?
8+
return if DiscourseAi::Embeddings::Schema.correctly_indexed?(embedding_def)
9+
10+
DiscourseAi::Embeddings::Schema.prepare_search_indexes(embedding_def)
11+
end
12+
end
13+
end
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# frozen_string_literal: true
2+
3+
module Jobs
4+
class RemoveOrphanedEmbeddings < ::Jobs::Scheduled
5+
every 1.week
6+
7+
def execute(_args)
8+
DiscourseAi::Embeddings::Schema.remove_orphaned_data
9+
end
10+
end
11+
end

0 commit comments

Comments
 (0)