@@ -17,14 +17,14 @@ def distance_functions
1717
1818 def tokenizer_names
1919 [
20- DiscourseAi ::Tokenizers ::AllMpnetBaseV2Tokenizer ,
21- DiscourseAi ::Tokenizers ::BgeLargeEnTokenizer ,
22- DiscourseAi ::Tokenizers ::BgeM3Tokenizer ,
23- DiscourseAi ::Tokenizers ::GeminiTokenizer ,
24- DiscourseAi ::Tokenizers ::MultilingualE5LargeTokenizer ,
25- DiscourseAi ::Tokenizers ::OpenAiTokenizer ,
26- DiscourseAi ::Tokenizers ::MistralTokenizer ,
27- DiscourseAi ::Tokenizers ::QwenTokenizer ,
20+ DiscourseAi ::Tokenizer ::AllMpnetBaseV2Tokenizer ,
21+ DiscourseAi ::Tokenizer ::BgeLargeEnTokenizer ,
22+ DiscourseAi ::Tokenizer ::BgeM3Tokenizer ,
23+ DiscourseAi ::Tokenizer ::GeminiTokenizer ,
24+ DiscourseAi ::Tokenizer ::MultilingualE5LargeTokenizer ,
25+ DiscourseAi ::Tokenizer ::OpenAiTokenizer ,
26+ DiscourseAi ::Tokenizer ::MistralTokenizer ,
27+ DiscourseAi ::Tokenizer ::QwenTokenizer ,
2828 ] . map ( &:name )
2929 end
3030
@@ -42,7 +42,7 @@ def presets
4242 dimensions : 1024 ,
4343 max_sequence_length : 512 ,
4444 pg_function : "<#>" ,
45- tokenizer_class : "DiscourseAi::Tokenizers ::BgeLargeEnTokenizer" ,
45+ tokenizer_class : "DiscourseAi::Tokenizer ::BgeLargeEnTokenizer" ,
4646 provider : HUGGING_FACE ,
4747 search_prompt : "Represent this sentence for searching relevant passages:" ,
4848 } ,
@@ -52,7 +52,7 @@ def presets
5252 dimensions : 1024 ,
5353 max_sequence_length : 8192 ,
5454 pg_function : "<#>" ,
55- tokenizer_class : "DiscourseAi::Tokenizers ::BgeM3Tokenizer" ,
55+ tokenizer_class : "DiscourseAi::Tokenizer ::BgeM3Tokenizer" ,
5656 provider : HUGGING_FACE ,
5757 } ,
5858 {
@@ -63,7 +63,7 @@ def presets
6363 pg_function : "<=>" ,
6464 url :
6565 "https://generativelanguage.googleapis.com/v1beta/models/embedding-001:embedContent" ,
66- tokenizer_class : "DiscourseAi::Tokenizers ::GeminiTokenizer" ,
66+ tokenizer_class : "DiscourseAi::Tokenizer ::GeminiTokenizer" ,
6767 provider : GOOGLE ,
6868 } ,
6969 {
@@ -72,7 +72,7 @@ def presets
7272 dimensions : 1024 ,
7373 max_sequence_length : 512 ,
7474 pg_function : "<=>" ,
75- tokenizer_class : "DiscourseAi::Tokenizers ::MultilingualE5LargeTokenizer" ,
75+ tokenizer_class : "DiscourseAi::Tokenizer ::MultilingualE5LargeTokenizer" ,
7676 provider : HUGGING_FACE ,
7777 } ,
7878 # "text-embedding-3-large" real dimentions are 3072, but we only support up to 2000 in the
@@ -83,7 +83,7 @@ def presets
8383 dimensions : 2000 ,
8484 max_sequence_length : 8191 ,
8585 pg_function : "<=>" ,
86- tokenizer_class : "DiscourseAi::Tokenizers ::OpenAiTokenizer" ,
86+ tokenizer_class : "DiscourseAi::Tokenizer ::OpenAiTokenizer" ,
8787 url : "https://api.openai.com/v1/embeddings" ,
8888 provider : OPEN_AI ,
8989 matryoshka_dimensions : true ,
@@ -97,7 +97,7 @@ def presets
9797 dimensions : 1536 ,
9898 max_sequence_length : 8191 ,
9999 pg_function : "<=>" ,
100- tokenizer_class : "DiscourseAi::Tokenizers ::OpenAiTokenizer" ,
100+ tokenizer_class : "DiscourseAi::Tokenizer ::OpenAiTokenizer" ,
101101 url : "https://api.openai.com/v1/embeddings" ,
102102 provider : OPEN_AI ,
103103 matryoshka_dimensions : true ,
@@ -111,7 +111,7 @@ def presets
111111 dimensions : 1536 ,
112112 max_sequence_length : 8191 ,
113113 pg_function : "<=>" ,
114- tokenizer_class : "DiscourseAi::Tokenizers ::OpenAiTokenizer" ,
114+ tokenizer_class : "DiscourseAi::Tokenizer ::OpenAiTokenizer" ,
115115 url : "https://api.openai.com/v1/embeddings" ,
116116 provider : OPEN_AI ,
117117 provider_params : {
0 commit comments