deeppavlov · NotBioWaste905 · Apr 21, 2025 · Apr 17, 2025 · Apr 17, 2025 · Apr 17, 2025
diff --git a/.github/workflows/build_and_publish_release.yml b/.github/workflows/build_and_publish_release.yml
@@ -0,0 +1,32 @@
+name: build_and_publish_release
+
+on:
+  workflow_dispatch:
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/main'
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+        with:
+          virtualenvs-create: false
+
+      - name: Configure Poetry
+        run: |
+          poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }}
+          poetry config http-basic.pypi __token__ ${{ secrets.PYPI_API_TOKEN }}
+
+      - name: Build and publish
+        run: |
+          poetry build
+          poetry publish
diff --git a/.github/workflows/test_release.yml b/.github/workflows/test_release.yml
@@ -0,0 +1,51 @@
+name: test_release
+
+on:
+  push:
+    branches: '**'
+  pull_request:
+    branches:
+    - main
+  workflow_dispatch:
+
+concurrency:
+    group: ${{ github.workflow }}-${{ github.ref }}
+    cancel-in-progress: ${{ github.ref != 'refs/heads/dev' && github.ref != 'refs/heads/main' }}
+
+jobs:
+  test_full:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+        os: [macOS-latest, windows-latest, ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: set up python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: setup poetry and install dependencies
+        run: |
+          python -m pip install --upgrade pip poetry==1.8.4
+
+      - name: build release
+        run: |
+          python -m poetry build
+
+      - name: install and test installed package
+        shell: bash
+        run: |
+          python -m venv test_env
+          . ${GITHUB_WORKSPACE}/test_env/bin/activate || . ${GITHUB_WORKSPACE}/test_env/Scripts/activate
+          pip install ./dist/*.whl
+          pip install pytest
+          # Debug information
+          echo "Current directory: $(pwd)"
+          echo "Directory contents:"
+          ls -la
+          # Actually run the tests with explicit path
+          python -m pytest tests/ -v
diff --git a/README.md b/README.md
@@ -65,7 +65,7 @@ Choose LLMs for generating and validating dialogue graph and invoke graph genera
 
 ```python
 from dialogue2graph.datasets.complex_dialogues.generation import LoopedGraphGenerator
-from langchain_openai import ChatOpenAI
+from langchain_community.chat_models import ChatOpenAI
 
 
 gen_model = ChatOpenAI(

diff --git a/dialogue2graph/datasets/__init__.py b/dialogue2graph/datasets/__init__.py
@@ -1,3 +1,4 @@
 from dialogue2graph.datasets.complex_dialogues.generation import CycleGraphGenerator
+from dialogue2graph.datasets.augment_dialogues.augmentation import DialogueAugmenter
 
-__all__ = ["CycleGraphGenerator"]
+__all__ = ["CycleGraphGenerator", "DialogueAugmenter"]
diff --git a/dialogue2graph/datasets/augment_dialogues/__init__.py b/dialogue2graph/datasets/augment_dialogues/__init__.py
@@ -0,0 +1,5 @@
+from dialogue2graph.datasets.augment_dialogues.augmentation import DialogueAugmenter
+
+__all__ = [
+    "DialogueAugmenter",
+]
diff --git a/dialogue2graph/datasets/augment_dialogues/augmentation.py b/dialogue2graph/datasets/augment_dialogues/augmentation.py
@@ -8,27 +8,31 @@
 from dialogue2graph.pipelines.core.algorithms import DialogAugmentation
 from dialogue2graph.pipelines.core.dialogue import Dialogue
 from dialogue2graph.pipelines.model_storage import ModelStorage
-from dialogue2graph.metrics.no_llm_metrics.metrics import (
-    is_correct_length, match_roles
-    )
+from dialogue2graph.metrics.no_llm_metrics.metrics import is_correct_length, match_roles
 
 logging.getLogger("langchain_core.vectorstores.base").setLevel(logging.ERROR)
 
+
 class AugmentedTurn(BaseModel):
     """Dialogue turn to augment"""
+
     participant: str
-    text: list[str] = Field(..., description="List of utterance variations for this turn")
+    text: list[str] = Field(
+        ..., description="List of utterance variations for this turn"
+    )
+
 
 class DialogueSequence(BaseModel):
     """Result as dialogue sequence"""
+
     result: list[AugmentedTurn] = Field(..., description="Sequence of augmented turns")
 
 
 class DialogueAugmenter(DialogAugmentation):
     """Class for dialogue augmentation.
-    
+
     Augments dialogues while preserving structure and conversation flow by rephrasing original dialogue lines."""
-    
+
     model_storage: ModelStorage = Field(..., description="Model storage instance")
     generation_llm: str = Field(..., description="Key for generation LLM in storage")
     formatting_llm: str = Field(..., description="Key for formatting LLM in storage")
@@ -40,33 +44,34 @@ def invoke(
         topic: str = "",
     ) -> Union[list[Dialogue], str]:
         """Augment dialogue while preserving conversation structure.
-        
+
         Args:
             dialogue: Input Dialogue object to augment
             prompt: Required augmentation prompt template
             topic: Contextual topic for augmentation (default: empty)
-                 
+
         Returns:
             List of augmented Dialogue objects or error message
         """
-        if prompt == '':
-            return 'Preprocessing failed: prompt should be a valid instruction for LLM'
-            
+        if prompt == "":
+            return "Preprocessing failed: prompt should be a valid instruction for LLM"
+
         try:
             message_dicts = [msg.model_dump() for msg in dialogue.messages]
             if message_dicts == []:
-                return 'Preprocessing failed: no messages found in the dialogue'
-                        
+                return "Preprocessing failed: no messages found in the dialogue"
+
             augmentation_prompt = PromptTemplate.from_template(prompt)
             parser = JsonOutputParser(pydantic_object=DialogueSequence)
-            
+
             fixed_parser = OutputFixingParser.from_llm(
-                parser=parser,
-                llm=self._get_llm(self.formatting_llm)
+                parser=parser, llm=self._get_llm(self.formatting_llm)
+            )
+
+            chain = (
+                augmentation_prompt | self._get_llm(self.generation_llm) | fixed_parser
             )
 
-            chain = augmentation_prompt | self._get_llm(self.generation_llm) | fixed_parser
-
             for attempt in range(3):
                 try:
                     result = chain.invoke({"topic": topic, "dialogue": message_dicts})
@@ -76,58 +81,55 @@ def invoke(
                     except Exception as e:
                         logging.error(f"Error creating dialogues: {str(e)}")
                         return f"Post-processing failed: {str(e)}"
-                
+
                 except ValidationError as ve:
-                    logging.warning(f"Validation error attempt {attempt+1}: {ve}")
+                    logging.warning(f"Validation error attempt {attempt + 1}: {ve}")
 
                 except Exception as e:
                     logging.error(f"Unexpected error: {str(e)}")
                     if attempt == 2:
                         return f"Augmentation failed: {str(e)}"
-                        
+
             return "Augmentation failed after 3 attempts"
-            
+
         except Exception as e:
             logging.exception("Critical error in augmentation pipeline")
             return f"Critical error: {str(e)}"
 
     async def ainvoke(self, *args, **kwargs):
         """Async version of invoke"""
         return self.invoke(*args, **kwargs)
-
-    async def evaluate(
-        self,
-        dialogue: Dialogue,
-        prompt: str,
-        topic: str = ""
-    ) -> dict:
+
+    async def evaluate(self, dialogue: Dialogue, prompt: str, topic: str = "") -> dict:
         """Evaluate augmentation quality with dictionary report format."""
         result = self.invoke(dialogue, prompt, topic)
-        
+
         if isinstance(result, str):
             return {"error": result}
-        
-        report = {}        
+
+        report = {}
         for i, augmented_dialogue in enumerate(result):
-            try:        
-                report[f'augmented_dialogue_{i}'] = {
+            try:
+                report[f"augmented_dialogue_{i}"] = {
                     "match_roles": match_roles(dialogue, augmented_dialogue),
-                    "correct_length": is_correct_length(dialogue, augmented_dialogue)
+                    "correct_length": is_correct_length(dialogue, augmented_dialogue),
                 }
             except Exception as e:
-                logging.error(f"Error while calculating metrics: {str(e)}")        
+                logging.error(f"Error while calculating metrics: {str(e)}")
         return report
 
     def _get_llm(self, llm_key: str):
         """Get model from model storage safely"""
         if llm_key not in self.model_storage.storage:
             raise ValueError(f"LLM key '{llm_key}' not found in model storage")
         return self.model_storage.storage[llm_key].model
-
-    def _combine_one_dialogue(self, augmentation_result: DialogueSequence, i: int) -> dict:
+
+    def _combine_one_dialogue(
+        self, augmentation_result: DialogueSequence, i: int
+    ) -> dict:
         """Combine new augmented dialogues from utterance variations"""
         new_augmented_dialogue = {}
-        new_augmented_dialogue['messages'] = []
+        new_augmented_dialogue["messages"] = []
         roles_to_add = [turn.participant for turn in augmentation_result.result]
         utterances_to_add = [turn.text[i] for turn in augmentation_result.result]
 
@@ -139,13 +141,13 @@ def _combine_one_dialogue(self, augmentation_result: DialogueSequence, i: int) -
 
         return new_augmented_dialogue
 
-    def _create_dialogues(self, result: dict) -> list[Dialogue]:        
+    def _create_dialogues(self, result: dict) -> list[Dialogue]:
         """Create a list of Dialogue objects"""
         try:
             augmentation_result = DialogueSequence(result=result)
         except Exception as e:
             logging.error(f"Wrong type of augmentation result: {str(e)}")
-            return f"Creating a list of Dialogue objects failed: {str(e)}" 
+            return f"Creating a list of Dialogue objects failed: {str(e)}"
 
         utterances_lists = [turn.text for turn in augmentation_result.result]
         lens = [len(uttr_list) for uttr_list in utterances_lists]
@@ -154,5 +156,8 @@ def _create_dialogues(self, result: dict) -> list[Dialogue]:
         for i in range(min(lens)):
             new_augmented_dialogue = self._combine_one_dialogue(augmentation_result, i)
             augmented_dialogues.append(new_augmented_dialogue)
-
-        return [Dialogue.from_list(new_augmented_dialogue['messages']) for new_augmented_dialogue in augmented_dialogues]
+
+        return [
+            Dialogue.from_list(new_augmented_dialogue["messages"])
+            for new_augmented_dialogue in augmented_dialogues
+        ]
diff --git a/dialogue2graph/datasets/complex_dialogues/generation.py b/dialogue2graph/datasets/complex_dialogues/generation.py
@@ -6,6 +6,7 @@
 """
 
 import logging
+import os
 from enum import Enum
 from typing import Optional, Dict, Any, Union
 
@@ -62,6 +63,7 @@ class GenerationError(BaseModel):
 
 class CycleGraphGenerator(BaseModel):
     """Class for generating graph with cycles"""
+
     cache: Optional[Any] = Field(default=None, exclude=True)
 
     class Config:
@@ -99,6 +101,7 @@ def evaluate(self, *args, report_type="dict", **kwargs):
 
 class GenerationPipeline(BaseModel):
     """Class for generation pipeline"""
+
     cache: Optional[Any] = Field(default=None, exclude=True)
     generation_model: BaseChatModel
     theme_validation_model: BaseChatModel
@@ -392,10 +395,20 @@ class LoopedGraphGenerator(TopicGraphGenerator):
     """Graph generator for topic-based dialogue generation with model storage support"""
 
     model_storage: ModelStorage = Field(description="Model storage")
-    generation_llm: str = Field(description="LLM for graph generation")
-    validation_llm: str = Field(description="LLM for validation")
-    cycle_ends_llm: str = Field(description="LLM for dialog sampler to find cycle ends")
-    theme_validation_llm: str = Field(description="LLM for theme validation")
+    generation_llm: str = Field(
+        description="LLM for graph generation", default="looped_graph_generation_llm:v1"
+    )
+    validation_llm: str = Field(
+        description="LLM for validation", default="looped_graph_validation_llm:v1"
+    )
+    cycle_ends_llm: str = Field(
+        description="LLM for dialog sampler to find cycle ends",
+        default="looped_graph_cycle_ends_llm:v1",
+    )
+    theme_validation_llm: str = Field(
+        description="LLM for theme validation",
+        default="looped_graph_theme_validation_llm:v1",
+    )
     pipeline: GenerationPipeline
 
     def __init__(
@@ -406,6 +419,42 @@ def __init__(
         cycle_ends_llm: str,
         theme_validation_llm: str,
     ):
+        # check if models are in model storage
+        # if model is not in model storage put the default model there
+        if generation_llm not in model_storage.storage:
+            model_storage.add(
+                key=generation_llm,
+                config={
+                    "name": "gpt-4o-latest",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                    "base_url": os.getenv("OPENAI_BASE_URL"),
+                },
+                model_type="llm",
+            )
+
+        if validation_llm not in model_storage.storage:
+            model_storage.add(
+                key=validation_llm,
+                config={
+                    "name": "gpt-3.5-turbo",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                    "base_url": os.getenv("OPENAI_BASE_URL"),
+                    "temperature": 0,
+                },
+                model_type="llm",
+            )
+
+        if theme_validation_llm not in model_storage.storage:
+            model_storage.add(
+                key=theme_validation_llm,
+                config={
+                    "name": "gpt-3.5-turbo",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                    "base_url": os.getenv("OPENAI_BASE_URL"),
+                    "temperature": 0,
+                },
+                model_type="llm",
+            )
         super().__init__(
             model_storage=model_storage,
             generation_llm=generation_llm,

diff --git a/dialogue2graph/pipelines/core/pipeline.py b/dialogue2graph/pipelines/core/pipeline.py
@@ -24,8 +24,7 @@
 
 
 class BasePipeline(BaseModel):
-    # TODO: add docs
-    """Abstract class for base pipeline"""
+    """Base class for pipelines"""
 
     name: str = Field(description="Name of the pipeline")
     steps: list[