@@ -29,6 +29,91 @@ def tokenizer_names
2929 def provider_params
3030 { open_ai : { model_name : :text } }
3131 end
32+
33+ def presets
34+ @presets ||=
35+ begin
36+ [
37+ {
38+ preset_id : "bge-large-en" ,
39+ display_name : "bge-large-en" ,
40+ dimensions : 1024 ,
41+ max_sequence_length : 512 ,
42+ pg_function : "<#>" ,
43+ tokenizer_class : "DiscourseAi::Tokenizer::BgeLargeEnTokenizer" ,
44+ provider : HUGGING_FACE ,
45+ } ,
46+ {
47+ preset_id : "bge-m3" ,
48+ display_name : "bge-m3" ,
49+ dimensions : 1024 ,
50+ max_sequence_length : 8192 ,
51+ pg_function : "<#>" ,
52+ tokenizer_class : "DiscourseAi::Tokenizer::BgeM3Tokenizer" ,
53+ provider : HUGGING_FACE ,
54+ } ,
55+ {
56+ preset_id : "gemini-embedding-001" ,
57+ display_name : "Gemini's embedding-001" ,
58+ dimensions : 768 ,
59+ max_sequence_length : 1536 ,
60+ pg_function : "<=>" ,
61+ url :
62+ "https://generativelanguage.googleapis.com/v1beta/models/embedding-001:embedContent" ,
63+ tokenizer_class : "DiscourseAi::Tokenizer::OpenAiTokenizer" ,
64+ provider : GOOGLE ,
65+ } ,
66+ {
67+ preset_id : "multilingual-e5-large" ,
68+ display_name : "multilingual-e5-large" ,
69+ dimensions : 1024 ,
70+ max_sequence_length : 512 ,
71+ pg_function : "<=>" ,
72+ tokenizer_class : "DiscourseAi::Tokenizer::MultilingualE5LargeTokenizer" ,
73+ provider : HUGGING_FACE ,
74+ } ,
75+ {
76+ preset_id : "text-embedding-3-large" ,
77+ display_name : "OpenAI's text-embedding-3-large" ,
78+ dimensions : 2000 ,
79+ max_sequence_length : 8191 ,
80+ pg_function : "<=>" ,
81+ tokenizer_class : "DiscourseAi::Tokenizer::OpenAiTokenizer" ,
82+ url : "https://api.openai.com/v1/embeddings" ,
83+ provider : OPEN_AI ,
84+ provider_params : {
85+ model_name : "text-embedding-3-large" ,
86+ } ,
87+ } ,
88+ {
89+ preset_id : "text-embedding-3-small" ,
90+ display_name : "OpenAI's text-embedding-3-small" ,
91+ dimensions : 1536 ,
92+ max_sequence_length : 8191 ,
93+ pg_function : "<=>" ,
94+ tokenizer_class : "DiscourseAi::Tokenizer::OpenAiTokenizer" ,
95+ url : "https://api.openai.com/v1/embeddings" ,
96+ provider : OPEN_AI ,
97+ provider_params : {
98+ model_name : "text-embedding-3-small" ,
99+ } ,
100+ } ,
101+ {
102+ preset_id : "text-embedding-ada-002" ,
103+ display_name : "OpenAI's text-embedding-ada-002" ,
104+ dimensions : 1536 ,
105+ max_sequence_length : 8191 ,
106+ pg_function : "<=>" ,
107+ tokenizer_class : "DiscourseAi::Tokenizer::OpenAiTokenizer" ,
108+ url : "https://api.openai.com/v1/embeddings" ,
109+ provider : OPEN_AI ,
110+ provider_params : {
111+ model_name : "text-embedding-ada-002" ,
112+ } ,
113+ } ,
114+ ]
115+ end
116+ end
32117 end
33118
34119 validates :provider , presence : true , inclusion : provider_names
0 commit comments