@@ -84,6 +84,7 @@ def presets
8484 tokenizer_class : "DiscourseAi::Tokenizer::OpenAiTokenizer" ,
8585 url : "https://api.openai.com/v1/embeddings" ,
8686 provider : OPEN_AI ,
87+ matryoshka_dimensions : true ,
8788 provider_params : {
8889 model_name : "text-embedding-3-large" ,
8990 } ,
@@ -97,6 +98,7 @@ def presets
9798 tokenizer_class : "DiscourseAi::Tokenizer::OpenAiTokenizer" ,
9899 url : "https://api.openai.com/v1/embeddings" ,
99100 provider : OPEN_AI ,
101+ matryoshka_dimensions : true ,
100102 provider_params : {
101103 model_name : "text-embedding-3-small" ,
102104 } ,
@@ -200,9 +202,7 @@ def hugging_face_client
200202 end
201203
202204 def open_ai_client
203- model_name = lookup_custom_param ( "model_name" )
204- can_shorten_dimensions = %w[ text-embedding-3-small text-embedding-3-large ] . include? ( model_name )
205- client_dimensions = can_shorten_dimensions ? dimensions : nil
205+ client_dimensions = matryoshka_dimensions ? dimensions : nil
206206
207207 DiscourseAi ::Inference ::OpenAiEmbeddings . new (
208208 endpoint_url ,
@@ -221,20 +221,21 @@ def gemini_client
221221#
222222# Table name: embedding_definitions
223223#
224- # id :bigint not null, primary key
225- # display_name :string not null
226- # dimensions :integer not null
227- # max_sequence_length :integer not null
228- # version :integer default(1), not null
229- # pg_function :string not null
230- # provider :string not null
231- # tokenizer_class :string not null
232- # url :string not null
233- # api_key :string
234- # seeded :boolean default(FALSE), not null
235- # provider_params :jsonb
236- # created_at :datetime not null
237- # updated_at :datetime not null
238- # embed_prompt :string default(""), not null
239- # search_prompt :string default(""), not null
224+ # id :bigint not null, primary key
225+ # display_name :string not null
226+ # dimensions :integer not null
227+ # max_sequence_length :integer not null
228+ # version :integer default(1), not null
229+ # pg_function :string not null
230+ # provider :string not null
231+ # tokenizer_class :string not null
232+ # url :string not null
233+ # api_key :string
234+ # seeded :boolean default(FALSE), not null
235+ # provider_params :jsonb
236+ # created_at :datetime not null
237+ # updated_at :datetime not null
238+ # embed_prompt :string default(""), not null
239+ # search_prompt :string default(""), not null
240+ # matryoshka_dimensions :boolean default(FALSE), not null
240241#
0 commit comments