Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ discourse_ai:
- text-embedding-3-small
- text-embedding-3-large
- multilingual-e5-large
- mxbai-embed-xsmall-v1
- bge-large-en
- gemini
- bge-m3
Expand Down
1 change: 1 addition & 0 deletions lib/embeddings/vector_representations/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def find_representation(model_name)
DiscourseAi::Embeddings::VectorRepresentations::BgeM3,
DiscourseAi::Embeddings::VectorRepresentations::Gemini,
DiscourseAi::Embeddings::VectorRepresentations::MultilingualE5Large,
DiscourseAi::Embeddings::VectorRepresentations::MxbaiEmbedXsmallV1,
DiscourseAi::Embeddings::VectorRepresentations::TextEmbedding3Large,
DiscourseAi::Embeddings::VectorRepresentations::TextEmbedding3Small,
DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002,
Expand Down
64 changes: 64 additions & 0 deletions lib/embeddings/vector_representations/mxbai_embed_xsmall_v1.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# frozen_string_literal: true

module DiscourseAi
module Embeddings
module VectorRepresentations
class MxbaiEmbedXsmallV1 < Base
class << self
def name
"mxbai-embed-xsmall-v1"
end

def correctly_configured?
SiteSetting.ai_embeddings_discourse_service_api_endpoint_srv.present? ||
SiteSetting.ai_embeddings_discourse_service_api_endpoint.present?
end

def dependant_setting_names
%w[
ai_embeddings_discourse_service_api_key
ai_embeddings_discourse_service_api_endpoint_srv
ai_embeddings_discourse_service_api_endpoint
]
end
end

def vector_from(text, asymetric: false)
inference_client.perform!(text)
end

def dimensions
384
end

def max_sequence_length
512
end

def id
1
end

def version
1
end

def pg_function
"<#>"
end

def pg_index_type
"halfvec_ip_ops"
end

def tokenizer
DiscourseAi::Tokenizer::MxbaiEmbedXsmallV1Tokenizer
end

def inference_client
DiscourseAi::Inference::DiscourseClassifier.instance(self.class.name)
end
end
end
end
end
12 changes: 12 additions & 0 deletions lib/tokenizer/mxbai_embed_xsmall_v1_tokenizer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

module DiscourseAi
module Tokenizer
class MxbaiEmbedXsmallV1Tokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/mxbai-embed-xsmall-v1.json")
end
end
end
end
Loading