deeppavlov
diff --git a/‎dialogue2graph/datasets/augment_dialogues/__init__.py‎ b/‎dialogue2graph/datasets/augment_dialogues/__init__.py‎
diff --git a/‎dialogue2graph/datasets/augment_dialogues/augmentation.py‎
Lines changed: 154 additions & 0 deletions b/‎dialogue2graph/datasets/augment_dialogues/augmentation.py‎
Lines changed: 154 additions & 0 deletions
diff --git a/‎dialogue2graph/datasets/augment_dialogues/prompts.py‎
Lines changed: 93 additions & 0 deletions b/‎dialogue2graph/datasets/augment_dialogues/prompts.py‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎dialogue2graph/pipelines/core/algorithms.py‎
Lines changed: 3 additions & 3 deletions b/‎dialogue2graph/pipelines/core/algorithms.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎dialogue2graph/pipelines/model_storage.py‎
Lines changed: 1 addition & 1 deletion b/‎dialogue2graph/pipelines/model_storage.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎experiments/exp2025_04_09_augment_metrics_and_class/README.md‎ b/‎experiments/exp2025_04_09_augment_metrics_and_class/README.md‎
@@ -0,0 +1,154 @@
+import logging
+from typing import Union
+from pydantic import BaseModel, Field, ValidationError
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
+from langchain.output_parsers import OutputFixingParser
+
+from dialogue2graph.pipelines.core.algorithms import DialogAugmentation
+from dialogue2graph.pipelines.core.dialogue import Dialogue
+from dialogue2graph.pipelines.model_storage import ModelStorage
+from dialogue2graph.metrics.no_llm_metrics.metrics import (
+    is_correct_length, match_roles
+    )
+
+logging.getLogger("langchain_core.vectorstores.base").setLevel(logging.ERROR)
+
+class AugmentedTurn(BaseModel):
+    participant: str
+    text: list[str] = Field(..., description="List of utterance variations for this turn")
+
+class DialogueSequence(BaseModel):
+    result: list[AugmentedTurn] = Field(..., description="Sequence of augmented turns")
+
+
+class DialogueAugmenter(DialogAugmentation):
+    """Augments dialogues while preserving structure and conversation flow by rephrasing original dialogue lines."""
+    
+    model_storage: ModelStorage = Field(..., description="Model storage instance")
+    generation_llm: str = Field(..., description="Key for generation LLM in storage")
+    formatting_llm: str = Field(..., description="Key for formatting LLM in storage")
+
+    def invoke(
+        self,
+        dialogue: Dialogue,
+        prompt: str,
+        topic: str = "",
+    ) -> Union[list[Dialogue], str]:
+        """Augments dialogue while preserving conversation structure.
+        
+        Args:
+            dialogue: Input Dialogue object to augment
+            prompt: Required augmentation prompt template
+            topic: Contextual topic for augmentation (default: empty)
+                 
+        Returns:
+            List of augmented Dialogue objects or error message
+        """
+        if prompt == '':
+            return 'Preprocessing failed: prompt should be a valid instruction for LLM'
+            
+        try:
+            message_dicts = [msg.model_dump() for msg in dialogue.messages]
+            if message_dicts == []:
+                return 'Preprocessing failed: no messages found in the dialogue'
+                        
+            augmentation_prompt = PromptTemplate.from_template(prompt)
+            parser = JsonOutputParser(pydantic_object=DialogueSequence)
+            
+            fixed_parser = OutputFixingParser.from_llm(
+                parser=parser,
+                llm=self._get_llm(self.formatting_llm)
+            )
+
+            chain = augmentation_prompt | self._get_llm(self.generation_llm) | fixed_parser
+            
+            for attempt in range(3):
+                try:
+                    result = chain.invoke({"topic": topic, "dialogue": message_dicts})
+                    try:
+                        augmented_dialogues = self._create_dialogues(result)
+                        return augmented_dialogues
+                    except Exception as e:
+                        logging.error(f"Error creating dialogues: {str(e)}")
+                        return f"Post-processing failed: {str(e)}"
+                
+                except ValidationError as ve:
+                    logging.warning(f"Validation error attempt {attempt+1}: {ve}")
+
+                except Exception as e:
+                    logging.error(f"Unexpected error: {str(e)}")
+                    if attempt == 2:
+                        return f"Augmentation failed: {str(e)}"
+                        
+            return "Augmentation failed after 3 attempts"
+            
+        except Exception as e:
+            logging.exception("Critical error in augmentation pipeline")
+            return f"Critical error: {str(e)}"
+
+    async def ainvoke(self, *args, **kwargs):
+        """Async version of invoke"""
+        return self.invoke(*args, **kwargs)
+    
+    async def evaluate(
+        self,
+        dialogue: Dialogue,
+        prompt: str,
+        topic: str = ""
+    ) -> dict:
+        """Evaluates augmentation quality with dictionary report format."""
+        result = self.invoke(dialogue, prompt, topic)
+        
+        if isinstance(result, str):
+            return {"error": result}
+        
+        report = {}        
+        for i, augmented_dialogue in enumerate(result):
+            try:        
+                report[f'augmented_dialogue_{i}'] = {
+                    "match_roles": match_roles(dialogue, augmented_dialogue),
+                    "correct_length": is_correct_length(dialogue, augmented_dialogue)
+                }
+            except Exception as e:
+                logging.error(f"Error while calculating metrics: {str(e)}")        
+        return report
+
+    def _get_llm(self, llm_key: str):
+        """Safe LLM retrieval with error handling"""
+        if llm_key not in self.model_storage.storage:
+            raise ValueError(f"LLM key '{llm_key}' not found in model storage")
+        return self.model_storage.storage[llm_key].model
+    
+    def _combine_one_dialogue(self, augmentation_result: DialogueSequence, i: int) -> dict:
+        """Combining new augmented dialogues from utterance variations"""
+        new_augmented_dialogue = {}
+        new_augmented_dialogue['messages'] = []
+        roles_to_add = [turn.participant for turn in augmentation_result.result]
+        utterances_to_add = [turn.text[i] for turn in augmentation_result.result]
+
+        for role, uttr in zip(roles_to_add, utterances_to_add):
+            dict_messages = {}
+            dict_messages["participant"] = role
+            dict_messages["text"] = uttr
+            new_augmented_dialogue["messages"].append(dict_messages)
+
+        return new_augmented_dialogue
+
+    def _create_dialogues(self, result: dict) -> list[Dialogue]:        
+        """Creating a list of Dialogue objects"""
+        try:
+            augmentation_result = DialogueSequence(result=result)
+        except Exception as e:
+            logging.error(f"Wrong type of augmentation result: {str(e)}")
+            return f"Creating a list of Dialogue objects failed: {str(e)}" 
+
+        utterances_lists = [turn.text for turn in augmentation_result.result]
+        lens = [len(uttr_list) for uttr_list in utterances_lists]
+
+        augmented_dialogues = []
+        for i in range(min(lens)):
+            new_augmented_dialogue = self._combine_one_dialogue(augmentation_result, i)
+            augmented_dialogues.append(new_augmented_dialogue)
+        
+        return [Dialogue.from_list(new_augmented_dialogue['messages']) for new_augmented_dialogue in augmented_dialogues]
@@ -0,0 +1,93 @@
+augmentation_prompt_from2to5_vars = """
+You are tasked with augmenting a dialogue by adding variations to existing utterances while maintaining the original dialogue flow and intent.
+
+INSTRUCTION:
+1. For each message in the dialogue:
+   - Create 2-5 variations of the 'text' field that:
+     * Express the same meaning/intent
+     * Use different wording and phrasing
+     * Match the given theme
+     * Sound natural and conversational
+
+2. Ensure all utterance variations:
+   - Do not repeat each other word for word
+   - Are appropriate for the theme
+   - Maintain consistency in tone and style
+   - Make sense in the conversation flow
+
+3. Make sure that all the utterances in the dialogue are different from each other.
+
+4. The output must be a list of dictionaries, where each dictionary has:
+   - 'participant': either 'user' or 'assistant'
+   - 'text': string   
+
+Below are EXAMPLES of original phrases and their corresponding augmented phrases.
+
+**EXAMPLE 1**
+ORIGINAL PHRASE: "I've checked and the camera is not blocked"
+AUGMENTED PHRASES: ["I've ensured that there's nothing in front of the camera", "I've made sure the camera is clear of any obstructions."]
+
+**EXAMPLE 2**
+ORIGINAL PHRASE: 'Alright, if you need any further assistance, feel free to reach out. Have a great day!'
+AUGMENTED PHRASES: ["Okay, if you ever need more help, don't hesitate to ask. Have a wonderful day!", "No problem! If you need any more help later on, don't hesitate to get in touch. Have a wonderful day!"]
+
+**EXAMPLE 3**
+ORIGINAL PHRASE: "I'm curious about the pricing for eco-friendly packaging."
+AUGMENTED PHRASES: ['Can you tell me about the expenses associated with eco-friendly packaging?', 'I want to know about the costs of eco-friendly packaging.']
+
+Now you will be provided with INPUT TOPIC and INPUT DIALOGUE. Return ONLY a valid JSON array containing the augmented dialogue messages. Each message should be in this exact format:
+For assistant messages: {{"participant": "assistant", "text": [list of utterance variations]}}
+For user messages: {{"participant": "user", "text": [list of utterance variations]}}
+
+INPUT THEME: {topic}
+
+INPUT DIALOGUE:
+{dialogue}
+"""
+
+augmentation_prompt_3_vars = """
+You are tasked with augmenting a dialogue by adding variations to existing utterances while maintaining the original dialogue flow and intent.
+
+INSTRUCTION:
+1. For each message in the dialogue:
+   - Create 3 variations of the 'text' field that:
+     * Express the same meaning/intent
+     * Use different wording and phrasing
+     * Match the given theme
+     * Sound natural and conversational
+
+2. Ensure all utterance variations:
+   - Do not repeat each other word for word
+   - Are appropriate for the theme
+   - Maintain consistency in tone and style
+   - Make sense in the conversation flow
+
+3. Make sure that all the utterances in the dialogue are different from each other.
+
+4. The output must be a list of dictionaries, where each dictionary has:
+   - 'participant': either 'user' or 'assistant'
+   - 'text': string   
+
+Below are EXAMPLES of original phrases and their corresponding augmented phrases.
+
+**EXAMPLE 1**
+ORIGINAL PHRASE: "I've checked and the camera is not blocked"
+AUGMENTED PHRASES: ["I've ensured that there's nothing in front of the camera", "I've made sure the camera is clear of any obstructions."]
+
+**EXAMPLE 2**
+ORIGINAL PHRASE: 'Alright, if you need any further assistance, feel free to reach out. Have a great day!'
+AUGMENTED PHRASES: ["Okay, if you ever need more help, don't hesitate to ask. Have a wonderful day!", "No problem! If you need any more help later on, don't hesitate to get in touch. Have a wonderful day!"]
+
+**EXAMPLE 3**
+ORIGINAL PHRASE: "I'm curious about the pricing for eco-friendly packaging."
+AUGMENTED PHRASES: ['Can you tell me about the expenses associated with eco-friendly packaging?', 'I want to know about the costs of eco-friendly packaging.']
+
+Now you will be provided with INPUT TOPIC and INPUT DIALOGUE. Return ONLY a valid JSON array containing the augmented dialogue messages. Each message should be in this exact format:
+For assistant messages: {{"participant": "assistant", "text": [list of utterance variations]}}
+For user messages: {{"participant": "user", "text": [list of utterance variations]}}
+
+INPUT THEME: {topic}
+
+INPUT DIALOGUE:
+{dialogue}
+"""
@@ -61,12 +61,12 @@ class DialogAugmentation(BaseAlgorithm):
     :param topic: The topic to guide the augmentation process (optional).
     """
 
-    def __init__(self) -> None:
-        super().__init__()
-
     def invoke(self, dialogue: Dialogue, topic: str = "") -> Dialogue:
         raise NotImplementedError
 
+    async def ainvoke(self, dialogue: Dialogue, topic: str = "") -> Dialogue:
+        raise NotImplementedError
+
 
 class GraphAugmentation(BaseAlgorithm):
     """Graph generator that works only with topics."""
 
@@ -5,7 +5,7 @@
 from pathlib import Path
 from pydantic import BaseModel, Field, model_validator
 
-from langchain_openai import ChatOpenAI
+from langchain_community.chat_models import ChatOpenAI
 from langchain_core.language_models import BaseChatModel
 from langchain_huggingface import HuggingFaceEmbeddings