add prompt logging (#220)

Samoed · github-actions[bot] · web-flow · commit fe2b79e7868b · 2025-06-11T08:54:58.000+03:00
* add prompt logging

* Update optimizer_config.schema.json

* fix

---------

Co-authored-by: github-actions[bot] &lt;github-actions[bot]@users.noreply.github.com&gt;
diff --git a/autointent/_embedder.py b/autointent/_embedder.py
@@ -198,13 +198,15 @@ def embed(self, utterances: list[str], task_type: TaskTypeEnum | None = None) ->
                 return np.load(embeddings_path)  # type: ignore[no-any-return]
 
         self._load_model()
+        prompt = self.config.get_prompt(task_type)
 
         logger.debug(
-            "Calculating embeddings with model %s, batch_size=%d, max_seq_length=%s, embedder_device=%s",
+            "Calculating embeddings with model %s, batch_size=%d, max_seq_length=%s, embedder_device=%s, prompt=%s",
             self.config.model_name,
             self.config.batch_size,
             str(self.config.tokenizer_config.max_length),
             self.config.device,
+            prompt,
         )
 
         if self.config.tokenizer_config.max_length is not None:
@@ -215,7 +217,7 @@ def embed(self, utterances: list[str], task_type: TaskTypeEnum | None = None) ->
             convert_to_numpy=True,
             batch_size=self.config.batch_size,
             normalize_embeddings=True,
-            prompt=self.config.get_prompt_type(task_type),
+            prompt=prompt,
         )
 
         if self.config.use_cache:
diff --git a/autointent/configs/_transformers.py b/autointent/configs/_transformers.py
@@ -59,7 +59,7 @@ class EmbedderConfig(HFModelConfig):
     default_prompt: str | None = Field(
         None, description="Default prompt for the model. This is used when no task specific prompt is not provided."
     )
-    classifier_prompt: str | None = Field(None, description="Prompt for classifier.")
+    classification_prompt: str | None = Field(None, description="Prompt for classifier.")
     cluster_prompt: str | None = Field(None, description="Prompt for clustering.")
     sts_prompt: str | None = Field(None, description="Prompt for finding most similar sentences.")
     query_prompt: str | None = Field(None, description="Prompt for query.")
@@ -79,8 +79,8 @@ def get_prompt_config(self) -> dict[str, str] | None:
         prompts = {}
         if self.default_prompt:
             prompts[TaskTypeEnum.default.value] = self.default_prompt
-        if self.classifier_prompt:
-            prompts[TaskTypeEnum.classification.value] = self.classifier_prompt
+        if self.classification_prompt:
+            prompts[TaskTypeEnum.classification.value] = self.classification_prompt
         if self.cluster_prompt:
             prompts[TaskTypeEnum.cluster.value] = self.cluster_prompt
         if self.query_prompt:
@@ -91,7 +91,7 @@ def get_prompt_config(self) -> dict[str, str] | None:
             prompts[TaskTypeEnum.sts.value] = self.sts_prompt
         return prompts if len(prompts) > 0 else None
 
-    def get_prompt_type(self, prompt_type: TaskTypeEnum | None) -> str | None:  # noqa: PLR0911
+    def get_prompt(self, prompt_type: TaskTypeEnum | None) -> str | None:  # noqa: PLR0911
         """Get the prompt type for the given task type.
 
         Args:
@@ -103,7 +103,7 @@ def get_prompt_type(self, prompt_type: TaskTypeEnum | None) -> str | None:  # no
         if prompt_type is None:
             return self.default_prompt
         if prompt_type == TaskTypeEnum.classification:
-            return self.classifier_prompt
+            return self.classification_prompt
         if prompt_type == TaskTypeEnum.cluster:
             return self.cluster_prompt
         if prompt_type == TaskTypeEnum.query:
diff --git a/docs/optimizer_config.schema.json b/docs/optimizer_config.schema.json
@@ -160,7 +160,7 @@
                     "description": "Default prompt for the model. This is used when no task specific prompt is not provided.",
                     "title": "Default Prompt"
                 },
-                "classifier_prompt": {
+                "classification_prompt": {
                     "anyOf": [
                         {
                             "type": "string"
@@ -171,7 +171,7 @@
                     ],
                     "default": null,
                     "description": "Prompt for classifier.",
-                    "title": "Classifier Prompt"
+                    "title": "Classification Prompt"
                 },
                 "cluster_prompt": {
                     "anyOf": [
@@ -459,7 +459,7 @@
                 },
                 "trust_remote_code": false,
                 "default_prompt": null,
-                "classifier_prompt": null,
+                "classification_prompt": null,
                 "cluster_prompt": null,
                 "sts_prompt": null,
                 "query_prompt": null,
diff --git a/docs/optimizer_search_space_config.schema.json b/docs/optimizer_search_space_config.schema.json
@@ -404,7 +404,7 @@
                     "description": "Default prompt for the model. This is used when no task specific prompt is not provided.",
                     "title": "Default Prompt"
                 },
-                "classifier_prompt": {
+                "classification_prompt": {
                     "anyOf": [
                         {
                             "type": "string"
diff --git a/tests/callback/test_callback.py b/tests/callback/test_callback.py
@@ -136,7 +136,7 @@ def test_pipeline_callbacks(dataset):
                 "module_kwargs": {
                     "embedder_config": {
                         "batch_size": 32,
-                        "classifier_prompt": None,
+                        "classification_prompt": None,
                         "cluster_prompt": None,
                         "default_prompt": None,
                         "device": None,
@@ -173,7 +173,7 @@ def test_pipeline_callbacks(dataset):
                 "module_kwargs": {
                     "embedder_config": {
                         "batch_size": 32,
-                        "classifier_prompt": None,
+                        "classification_prompt": None,
                         "cluster_prompt": None,
                         "default_prompt": None,
                         "device": None,
@@ -210,7 +210,7 @@ def test_pipeline_callbacks(dataset):
                 "module_kwargs": {
                     "embedder_config": {
                         "batch_size": 32,
-                        "classifier_prompt": None,
+                        "classification_prompt": None,
                         "cluster_prompt": None,
                         "default_prompt": None,
                         "device": None,

Original file line number	Diff line number	Diff line change
`@@ -404,7 +404,7 @@`
`404`	`404`	`"description": "Default prompt for the model. This is used when no task specific prompt is not provided.",`
`405`	`405`	`"title": "Default Prompt"`
`406`	`406`	`},`
`407`		`- "classifier_prompt": {`
	`407`	`+ "classification_prompt": {`
`408`	`408`	`"anyOf": [`
`409`	`409`	`{`
`410`	`410`	`"type": "string"`