Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 45e1b0f

Browse files
committed
FIX: Fix embeddings to use the old OpenAI tokenizer
1 parent 1b16fc8 commit 45e1b0f

File tree

3 files changed

+18
-4
lines changed

3 files changed

+18
-4
lines changed

app/models/embedding_definition.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def presets
8484
dimensions: 2000,
8585
max_sequence_length: 8191,
8686
pg_function: "<=>",
87-
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiTokenizer",
87+
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiCl100kTokenizer",
8888
url: "https://api.openai.com/v1/embeddings",
8989
provider: OPEN_AI,
9090
matryoshka_dimensions: true,
@@ -98,7 +98,7 @@ def presets
9898
dimensions: 1536,
9999
max_sequence_length: 8191,
100100
pg_function: "<=>",
101-
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiTokenizer",
101+
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiCl100kTokenizer",
102102
url: "https://api.openai.com/v1/embeddings",
103103
provider: OPEN_AI,
104104
matryoshka_dimensions: true,
@@ -112,7 +112,7 @@ def presets
112112
dimensions: 1536,
113113
max_sequence_length: 8191,
114114
pg_function: "<=>",
115-
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiTokenizer",
115+
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiCl100kTokenizer",
116116
url: "https://api.openai.com/v1/embeddings",
117117
provider: OPEN_AI,
118118
provider_params: {
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# frozen_string_literal: true
2+
class UpdateOpenAiEmbeddingsTokenizer < ActiveRecord::Migration[7.2]
3+
def up
4+
execute <<~SQL
5+
UPDATE embedding_definitions
6+
SET tokenizer_class = 'DiscourseAi::Tokenizer::OpenAiCl100kTokenizer'
7+
WHERE url LIKE '%https://api.openai.com/%' AND tokenizer_class <> 'DiscourseAi::Tokenizer::OpenAiCl100kTokenizer'
8+
SQL
9+
end
10+
11+
def down
12+
raise ActiveRecord::IrreversibleMigration
13+
end
14+
end

spec/system/embeddings/ai_embedding_definition_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
form.field("provider").select(EmbeddingDefinition::OPEN_AI)
5151
form.field("url").fill_in("https://api.openai.com/v1/embeddings")
5252
form.field("api_key").fill_in(api_key)
53-
form.field("tokenizer_class").select("DiscourseAi::Tokenizer::OpenAiTokenizer")
53+
form.field("tokenizer_class").select("DiscourseAi::Tokenizer::OpenAiCl100kTokenizer")
5454

5555
embed_prefix = "On creation:"
5656
search_prefix = "On search:"

0 commit comments

Comments
 (0)