From 7ad6430c117f1679463c120dcf986dcb4d0cece5 Mon Sep 17 00:00:00 2001
From: andhreljaKern <andrea.hrelja@kern.ai>
Date: Wed, 19 Nov 2025 14:15:28 +0100
Subject: [PATCH 1/3] perf: upgrade privatemode embeddings model

---
 src/embedders/classification/contextual.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/embedders/classification/contextual.py b/src/embedders/classification/contextual.py
index 944e90d..a61d040 100644
--- a/src/embedders/classification/contextual.py
+++ b/src/embedders/classification/contextual.py
@@ -217,7 +217,7 @@ class PrivatemodeAISentenceEmbedder(SentenceEmbedder):
     def __init__(
         self,
         batch_size: int = 128,
-        model_name: str = "intfloat/multilingual-e5-large-instruct",
+        model_name: str = "qwen3-embedding-4b",
     ):
         """
         Embeds documents using privatemode ai proxy via OpenAI classes.
@@ -225,7 +225,7 @@ def __init__(
 
         Args:
             batch_size (int, optional): Defines the number of conversions after which the embedder yields. Defaults to 128.
-            model_name (str, optional): Name of the embedding model from Privatemode AI (e.g. intfloat/multilingual-e5-large-instruct). Defaults to "intfloat/multilingual-e5-large-instruct".
+            model_name (str, optional): Name of the embedding model from Privatemode AI (e.g. intfloat/multilingual-e5-large-instruct). Defaults to "qwen3-embedding-4b".
 
         Raises:
             Exception: If you use Azure, you need to provide api_type, api_version and api_base.

From a6caf1befc92707a6fc28b09796d23dbcf97b23b Mon Sep 17 00:00:00 2001
From: andhreljaKern <andrea.hrelja@kern.ai>
Date: Thu, 20 Nov 2025 08:32:03 +0100
Subject: [PATCH 2/3] perf: increase trim length for privatemode

---
 src/embedders/classification/contextual.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/embedders/classification/contextual.py b/src/embedders/classification/contextual.py
index a61d040..e2baa7c 100644
--- a/src/embedders/classification/contextual.py
+++ b/src/embedders/classification/contextual.py
@@ -278,7 +278,7 @@ def dump(self, project_id: str, embedding_id: str) -> None:
         export_file.parent.mkdir(parents=True, exist_ok=True)
         util.write_json(self.to_json(), export_file, indent=2)
 
-    def _trim_length(self, text: str, max_length: int = 512) -> str:
+    def _trim_length(self, text: str, max_length: int = 32000) -> str:
         tokens = self._auto_tokenizer(
             text,
             truncation=True,

From c196685276570de0f8633843ba927d7db1f2a3b2 Mon Sep 17 00:00:00 2001
From: andhreljaKern <andrea.hrelja@kern.ai>
Date: Thu, 20 Nov 2025 08:59:30 +0100
Subject: [PATCH 3/3] perf: update auto_tokenizer for privatemode

---
 src/embedders/classification/contextual.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/embedders/classification/contextual.py b/src/embedders/classification/contextual.py
index e2baa7c..5108bdd 100644
--- a/src/embedders/classification/contextual.py
+++ b/src/embedders/classification/contextual.py
@@ -218,6 +218,7 @@ def __init__(
         self,
         batch_size: int = 128,
         model_name: str = "qwen3-embedding-4b",
+        hf_model_name: str = "boboliu/Qwen3-Embedding-4B-W4A16-G128",
     ):
         """
         Embeds documents using privatemode ai proxy via OpenAI classes.
@@ -238,8 +239,8 @@ def __init__(
             api_key="dummy",  # Set in proxy
             base_url=PRIVATEMODE_AI_URL,
         )
-        # for trimming the length of the text if > 512 tokens
-        self._auto_tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        # for trimming the length of the text if > 32000 tokens
+        self._auto_tokenizer = AutoTokenizer.from_pretrained(hf_model_name)
 
     def _encode(
         self, documents: List[Union[str, Doc]], fit_model: bool