@@ -17,14 +17,14 @@ def distance_functions
1717
1818    def  tokenizer_names 
1919      [ 
20-         DiscourseAi ::Tokenizer ::AllMpnetBaseV2Tokenizer , 
21-         DiscourseAi ::Tokenizer ::BgeLargeEnTokenizer , 
22-         DiscourseAi ::Tokenizer ::BgeM3Tokenizer , 
23-         DiscourseAi ::Tokenizer ::GeminiTokenizer , 
24-         DiscourseAi ::Tokenizer ::MultilingualE5LargeTokenizer , 
25-         DiscourseAi ::Tokenizer ::OpenAiTokenizer , 
26-         DiscourseAi ::Tokenizer :: MixtralTokenizer , 
27-         DiscourseAi ::Tokenizer ::QwenTokenizer , 
20+         DiscourseAi ::Tokenizers ::AllMpnetBaseV2Tokenizer , 
21+         DiscourseAi ::Tokenizers ::BgeLargeEnTokenizer , 
22+         DiscourseAi ::Tokenizers ::BgeM3Tokenizer , 
23+         DiscourseAi ::Tokenizers ::GeminiTokenizer , 
24+         DiscourseAi ::Tokenizers ::MultilingualE5LargeTokenizer , 
25+         DiscourseAi ::Tokenizers ::OpenAiTokenizer , 
26+         DiscourseAi ::Tokenizers :: MistralTokenizer , 
27+         DiscourseAi ::Tokenizers ::QwenTokenizer , 
2828      ] . map ( &:name ) 
2929    end 
3030
@@ -42,7 +42,7 @@ def presets
4242              dimensions : 1024 , 
4343              max_sequence_length : 512 , 
4444              pg_function : "<#>" , 
45-               tokenizer_class : "DiscourseAi::Tokenizer ::BgeLargeEnTokenizer" , 
45+               tokenizer_class : "DiscourseAi::Tokenizers ::BgeLargeEnTokenizer" , 
4646              provider : HUGGING_FACE , 
4747              search_prompt : "Represent this sentence for searching relevant passages:" , 
4848            } , 
@@ -52,7 +52,7 @@ def presets
5252              dimensions : 1024 , 
5353              max_sequence_length : 8192 , 
5454              pg_function : "<#>" , 
55-               tokenizer_class : "DiscourseAi::Tokenizer ::BgeM3Tokenizer" , 
55+               tokenizer_class : "DiscourseAi::Tokenizers ::BgeM3Tokenizer" , 
5656              provider : HUGGING_FACE , 
5757            } , 
5858            { 
@@ -63,7 +63,7 @@ def presets
6363              pg_function : "<=>" , 
6464              url :
6565                "https://generativelanguage.googleapis.com/v1beta/models/embedding-001:embedContent" , 
66-               tokenizer_class : "DiscourseAi::Tokenizer ::GeminiTokenizer" , 
66+               tokenizer_class : "DiscourseAi::Tokenizers ::GeminiTokenizer" , 
6767              provider : GOOGLE , 
6868            } , 
6969            { 
@@ -72,7 +72,7 @@ def presets
7272              dimensions : 1024 , 
7373              max_sequence_length : 512 , 
7474              pg_function : "<=>" , 
75-               tokenizer_class : "DiscourseAi::Tokenizer ::MultilingualE5LargeTokenizer" , 
75+               tokenizer_class : "DiscourseAi::Tokenizers ::MultilingualE5LargeTokenizer" , 
7676              provider : HUGGING_FACE , 
7777            } , 
7878            # "text-embedding-3-large" real dimentions are 3072, but we only support up to 2000 in the 
@@ -83,7 +83,7 @@ def presets
8383              dimensions : 2000 , 
8484              max_sequence_length : 8191 , 
8585              pg_function : "<=>" , 
86-               tokenizer_class : "DiscourseAi::Tokenizer ::OpenAiTokenizer" , 
86+               tokenizer_class : "DiscourseAi::Tokenizers ::OpenAiTokenizer" , 
8787              url : "https://api.openai.com/v1/embeddings" , 
8888              provider : OPEN_AI , 
8989              matryoshka_dimensions : true , 
@@ -97,7 +97,7 @@ def presets
9797              dimensions : 1536 , 
9898              max_sequence_length : 8191 , 
9999              pg_function : "<=>" , 
100-               tokenizer_class : "DiscourseAi::Tokenizer ::OpenAiTokenizer" , 
100+               tokenizer_class : "DiscourseAi::Tokenizers ::OpenAiTokenizer" , 
101101              url : "https://api.openai.com/v1/embeddings" , 
102102              provider : OPEN_AI , 
103103              matryoshka_dimensions : true , 
@@ -111,7 +111,7 @@ def presets
111111              dimensions : 1536 , 
112112              max_sequence_length : 8191 , 
113113              pg_function : "<=>" , 
114-               tokenizer_class : "DiscourseAi::Tokenizer ::OpenAiTokenizer" , 
114+               tokenizer_class : "DiscourseAi::Tokenizers ::OpenAiTokenizer" , 
115115              url : "https://api.openai.com/v1/embeddings" , 
116116              provider : OPEN_AI , 
117117              provider_params : { 
0 commit comments