From 7ad6430c117f1679463c120dcf986dcb4d0cece5 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 19 Nov 2025 14:15:28 +0100 Subject: [PATCH 1/3] perf: upgrade privatemode embeddings model --- src/embedders/classification/contextual.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/embedders/classification/contextual.py b/src/embedders/classification/contextual.py index 944e90d..a61d040 100644 --- a/src/embedders/classification/contextual.py +++ b/src/embedders/classification/contextual.py @@ -217,7 +217,7 @@ class PrivatemodeAISentenceEmbedder(SentenceEmbedder): def __init__( self, batch_size: int = 128, - model_name: str = "intfloat/multilingual-e5-large-instruct", + model_name: str = "qwen3-embedding-4b", ): """ Embeds documents using privatemode ai proxy via OpenAI classes. @@ -225,7 +225,7 @@ def __init__( Args: batch_size (int, optional): Defines the number of conversions after which the embedder yields. Defaults to 128. - model_name (str, optional): Name of the embedding model from Privatemode AI (e.g. intfloat/multilingual-e5-large-instruct). Defaults to "intfloat/multilingual-e5-large-instruct". + model_name (str, optional): Name of the embedding model from Privatemode AI (e.g. intfloat/multilingual-e5-large-instruct). Defaults to "qwen3-embedding-4b". Raises: Exception: If you use Azure, you need to provide api_type, api_version and api_base. From a6caf1befc92707a6fc28b09796d23dbcf97b23b Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 20 Nov 2025 08:32:03 +0100 Subject: [PATCH 2/3] perf: increase trim length for privatemode --- src/embedders/classification/contextual.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/embedders/classification/contextual.py b/src/embedders/classification/contextual.py index a61d040..e2baa7c 100644 --- a/src/embedders/classification/contextual.py +++ b/src/embedders/classification/contextual.py @@ -278,7 +278,7 @@ def dump(self, project_id: str, embedding_id: str) -> None: export_file.parent.mkdir(parents=True, exist_ok=True) util.write_json(self.to_json(), export_file, indent=2) - def _trim_length(self, text: str, max_length: int = 512) -> str: + def _trim_length(self, text: str, max_length: int = 32000) -> str: tokens = self._auto_tokenizer( text, truncation=True, From c196685276570de0f8633843ba927d7db1f2a3b2 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 20 Nov 2025 08:59:30 +0100 Subject: [PATCH 3/3] perf: update auto_tokenizer for privatemode --- src/embedders/classification/contextual.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/embedders/classification/contextual.py b/src/embedders/classification/contextual.py index e2baa7c..5108bdd 100644 --- a/src/embedders/classification/contextual.py +++ b/src/embedders/classification/contextual.py @@ -218,6 +218,7 @@ def __init__( self, batch_size: int = 128, model_name: str = "qwen3-embedding-4b", + hf_model_name: str = "boboliu/Qwen3-Embedding-4B-W4A16-G128", ): """ Embeds documents using privatemode ai proxy via OpenAI classes. @@ -238,8 +239,8 @@ def __init__( api_key="dummy", # Set in proxy base_url=PRIVATEMODE_AI_URL, ) - # for trimming the length of the text if > 512 tokens - self._auto_tokenizer = AutoTokenizer.from_pretrained(self.model_name) + # for trimming the length of the text if > 32000 tokens + self._auto_tokenizer = AutoTokenizer.from_pretrained(hf_model_name) def _encode( self, documents: List[Union[str, Doc]], fit_model: bool