Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
f036cb1
Refactor imports to use langchain_community.chat_models instead of la…
NotBioWaste905 Apr 17, 2025
a134247
add test_release workflow
NotBioWaste905 Apr 17, 2025
ce99dc3
fix test_release, add other os
NotBioWaste905 Apr 17, 2025
a170ebb
fix: specify shell for test installed package step in test_release wo…
NotBioWaste905 Apr 17, 2025
3f74d7b
fix shell command placement
NotBioWaste905 Apr 17, 2025
c8ab369
refactor: consolidate installation and testing steps in test_release …
NotBioWaste905 Apr 17, 2025
7a8a698
fighting with windows venv
NotBioWaste905 Apr 17, 2025
dc4ad0b
feat: add build and publish workflow; update test_release for cross-p…
NotBioWaste905 Apr 17, 2025
f75a102
fixing macos and windows cal once more
NotBioWaste905 Apr 18, 2025
2fe059b
move model initialization from pipelines to the algorithms
NotBioWaste905 Apr 18, 2025
40c1991
fix: update project description and author information in pyproject.toml
NotBioWaste905 Apr 18, 2025
8c8d957
fix: replace remove with pop to safely handle missing api_key in mode…
NotBioWaste905 Apr 18, 2025
cf23f8c
add bunch of __init__ files for aliasing
NotBioWaste905 Apr 18, 2025
5ff4c10
remove pip upgrade
NotBioWaste905 Apr 18, 2025
d0aa7e7
Merge remote-tracking branch 'origin/dev' into v0.1.0-MVP
NotBioWaste905 Apr 18, 2025
3153fc5
lint
NotBioWaste905 Apr 18, 2025
6262c2d
trying to fix workflow issues related to windows and macos
NotBioWaste905 Apr 18, 2025
789714a
туче екн
NotBioWaste905 Apr 18, 2025
da8b8f0
Merge remote-tracking branch 'origin/dev' into v0.1.0-MVP
NotBioWaste905 Apr 18, 2025
79b54e9
fix poetry lock
NotBioWaste905 Apr 18, 2025
dd04e57
modelstorage key renaming
NotBioWaste905 Apr 21, 2025
8342d59
Update the documentation
NotBioWaste905 Apr 21, 2025
121438c
format
NotBioWaste905 Apr 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/build_and_publish_release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: build_and_publish_release

on:
workflow_dispatch:

jobs:
deploy:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'

steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: false

- name: Configure Poetry
run: |
poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }}
poetry config http-basic.pypi __token__ ${{ secrets.PYPI_API_TOKEN }}
- name: Build and publish
run: |
poetry build
poetry publish
51 changes: 51 additions & 0 deletions .github/workflows/test_release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: test_release

on:
push:
branches: '**'
pull_request:
branches:
- main
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/dev' && github.ref != 'refs/heads/main' }}

jobs:
test_full:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
os: [macOS-latest, windows-latest, ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4

- name: set up python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: setup poetry and install dependencies
run: |
python -m pip install --upgrade pip poetry==1.8.4

- name: build release
run: |
python -m poetry build

- name: install and test installed package
shell: bash
run: |
python -m venv test_env
. ${GITHUB_WORKSPACE}/test_env/bin/activate || . ${GITHUB_WORKSPACE}/test_env/Scripts/activate
pip install ./dist/*.whl
pip install pytest
# Debug information
echo "Current directory: $(pwd)"
echo "Directory contents:"
ls -la
# Actually run the tests with explicit path
python -m pytest tests/ -v
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ Choose LLMs for generating and validating dialogue graph and invoke graph genera

```python
from dialogue2graph.datasets.complex_dialogues.generation import LoopedGraphGenerator
from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatOpenAI


gen_model = ChatOpenAI(
Expand Down
3 changes: 2 additions & 1 deletion dialogue2graph/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dialogue2graph.datasets.complex_dialogues.generation import CycleGraphGenerator
from dialogue2graph.datasets.augment_dialogues.augmentation import DialogueAugmenter

__all__ = ["CycleGraphGenerator"]
__all__ = ["CycleGraphGenerator", "DialogueAugmenter"]
5 changes: 5 additions & 0 deletions dialogue2graph/datasets/augment_dialogues/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from dialogue2graph.datasets.augment_dialogues.augmentation import DialogueAugmenter

__all__ = [
"DialogueAugmenter",
]
91 changes: 48 additions & 43 deletions dialogue2graph/datasets/augment_dialogues/augmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,31 @@
from dialogue2graph.pipelines.core.algorithms import DialogAugmentation
from dialogue2graph.pipelines.core.dialogue import Dialogue
from dialogue2graph.pipelines.model_storage import ModelStorage
from dialogue2graph.metrics.no_llm_metrics.metrics import (
is_correct_length, match_roles
)
from dialogue2graph.metrics.no_llm_metrics.metrics import is_correct_length, match_roles

logging.getLogger("langchain_core.vectorstores.base").setLevel(logging.ERROR)


class AugmentedTurn(BaseModel):
"""Dialogue turn to augment"""

participant: str
text: list[str] = Field(..., description="List of utterance variations for this turn")
text: list[str] = Field(
..., description="List of utterance variations for this turn"
)


class DialogueSequence(BaseModel):
"""Result as dialogue sequence"""

result: list[AugmentedTurn] = Field(..., description="Sequence of augmented turns")


class DialogueAugmenter(DialogAugmentation):
"""Class for dialogue augmentation.

Augments dialogues while preserving structure and conversation flow by rephrasing original dialogue lines."""

model_storage: ModelStorage = Field(..., description="Model storage instance")
generation_llm: str = Field(..., description="Key for generation LLM in storage")
formatting_llm: str = Field(..., description="Key for formatting LLM in storage")
Expand All @@ -40,33 +44,34 @@ def invoke(
topic: str = "",
) -> Union[list[Dialogue], str]:
"""Augment dialogue while preserving conversation structure.

Args:
dialogue: Input Dialogue object to augment
prompt: Required augmentation prompt template
topic: Contextual topic for augmentation (default: empty)

Returns:
List of augmented Dialogue objects or error message
"""
if prompt == '':
return 'Preprocessing failed: prompt should be a valid instruction for LLM'
if prompt == "":
return "Preprocessing failed: prompt should be a valid instruction for LLM"

try:
message_dicts = [msg.model_dump() for msg in dialogue.messages]
if message_dicts == []:
return 'Preprocessing failed: no messages found in the dialogue'
return "Preprocessing failed: no messages found in the dialogue"

augmentation_prompt = PromptTemplate.from_template(prompt)
parser = JsonOutputParser(pydantic_object=DialogueSequence)

fixed_parser = OutputFixingParser.from_llm(
parser=parser,
llm=self._get_llm(self.formatting_llm)
parser=parser, llm=self._get_llm(self.formatting_llm)
)

chain = (
augmentation_prompt | self._get_llm(self.generation_llm) | fixed_parser
)

chain = augmentation_prompt | self._get_llm(self.generation_llm) | fixed_parser

for attempt in range(3):
try:
result = chain.invoke({"topic": topic, "dialogue": message_dicts})
Expand All @@ -76,58 +81,55 @@ def invoke(
except Exception as e:
logging.error(f"Error creating dialogues: {str(e)}")
return f"Post-processing failed: {str(e)}"

except ValidationError as ve:
logging.warning(f"Validation error attempt {attempt+1}: {ve}")
logging.warning(f"Validation error attempt {attempt + 1}: {ve}")

except Exception as e:
logging.error(f"Unexpected error: {str(e)}")
if attempt == 2:
return f"Augmentation failed: {str(e)}"

return "Augmentation failed after 3 attempts"

except Exception as e:
logging.exception("Critical error in augmentation pipeline")
return f"Critical error: {str(e)}"

async def ainvoke(self, *args, **kwargs):
"""Async version of invoke"""
return self.invoke(*args, **kwargs)

async def evaluate(
self,
dialogue: Dialogue,
prompt: str,
topic: str = ""
) -> dict:

async def evaluate(self, dialogue: Dialogue, prompt: str, topic: str = "") -> dict:
"""Evaluate augmentation quality with dictionary report format."""
result = self.invoke(dialogue, prompt, topic)

if isinstance(result, str):
return {"error": result}
report = {}

report = {}
for i, augmented_dialogue in enumerate(result):
try:
report[f'augmented_dialogue_{i}'] = {
try:
report[f"augmented_dialogue_{i}"] = {
"match_roles": match_roles(dialogue, augmented_dialogue),
"correct_length": is_correct_length(dialogue, augmented_dialogue)
"correct_length": is_correct_length(dialogue, augmented_dialogue),
}
except Exception as e:
logging.error(f"Error while calculating metrics: {str(e)}")
logging.error(f"Error while calculating metrics: {str(e)}")
return report

def _get_llm(self, llm_key: str):
"""Get model from model storage safely"""
if llm_key not in self.model_storage.storage:
raise ValueError(f"LLM key '{llm_key}' not found in model storage")
return self.model_storage.storage[llm_key].model

def _combine_one_dialogue(self, augmentation_result: DialogueSequence, i: int) -> dict:

def _combine_one_dialogue(
self, augmentation_result: DialogueSequence, i: int
) -> dict:
"""Combine new augmented dialogues from utterance variations"""
new_augmented_dialogue = {}
new_augmented_dialogue['messages'] = []
new_augmented_dialogue["messages"] = []
roles_to_add = [turn.participant for turn in augmentation_result.result]
utterances_to_add = [turn.text[i] for turn in augmentation_result.result]

Expand All @@ -139,13 +141,13 @@ def _combine_one_dialogue(self, augmentation_result: DialogueSequence, i: int) -

return new_augmented_dialogue

def _create_dialogues(self, result: dict) -> list[Dialogue]:
def _create_dialogues(self, result: dict) -> list[Dialogue]:
"""Create a list of Dialogue objects"""
try:
augmentation_result = DialogueSequence(result=result)
except Exception as e:
logging.error(f"Wrong type of augmentation result: {str(e)}")
return f"Creating a list of Dialogue objects failed: {str(e)}"
return f"Creating a list of Dialogue objects failed: {str(e)}"

utterances_lists = [turn.text for turn in augmentation_result.result]
lens = [len(uttr_list) for uttr_list in utterances_lists]
Expand All @@ -154,5 +156,8 @@ def _create_dialogues(self, result: dict) -> list[Dialogue]:
for i in range(min(lens)):
new_augmented_dialogue = self._combine_one_dialogue(augmentation_result, i)
augmented_dialogues.append(new_augmented_dialogue)

return [Dialogue.from_list(new_augmented_dialogue['messages']) for new_augmented_dialogue in augmented_dialogues]

return [
Dialogue.from_list(new_augmented_dialogue["messages"])
for new_augmented_dialogue in augmented_dialogues
]
57 changes: 53 additions & 4 deletions dialogue2graph/datasets/complex_dialogues/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import logging
import os
from enum import Enum
from typing import Optional, Dict, Any, Union

Expand Down Expand Up @@ -62,6 +63,7 @@ class GenerationError(BaseModel):

class CycleGraphGenerator(BaseModel):
"""Class for generating graph with cycles"""

cache: Optional[Any] = Field(default=None, exclude=True)

class Config:
Expand Down Expand Up @@ -99,6 +101,7 @@ def evaluate(self, *args, report_type="dict", **kwargs):

class GenerationPipeline(BaseModel):
"""Class for generation pipeline"""

cache: Optional[Any] = Field(default=None, exclude=True)
generation_model: BaseChatModel
theme_validation_model: BaseChatModel
Expand Down Expand Up @@ -392,10 +395,20 @@ class LoopedGraphGenerator(TopicGraphGenerator):
"""Graph generator for topic-based dialogue generation with model storage support"""

model_storage: ModelStorage = Field(description="Model storage")
generation_llm: str = Field(description="LLM for graph generation")
validation_llm: str = Field(description="LLM for validation")
cycle_ends_llm: str = Field(description="LLM for dialog sampler to find cycle ends")
theme_validation_llm: str = Field(description="LLM for theme validation")
generation_llm: str = Field(
description="LLM for graph generation", default="looped_graph_generation_llm:v1"
)
validation_llm: str = Field(
description="LLM for validation", default="looped_graph_validation_llm:v1"
)
cycle_ends_llm: str = Field(
description="LLM for dialog sampler to find cycle ends",
default="looped_graph_cycle_ends_llm:v1",
)
theme_validation_llm: str = Field(
description="LLM for theme validation",
default="looped_graph_theme_validation_llm:v1",
)
pipeline: GenerationPipeline

def __init__(
Expand All @@ -406,6 +419,42 @@ def __init__(
cycle_ends_llm: str,
theme_validation_llm: str,
):
# check if models are in model storage
# if model is not in model storage put the default model there
if generation_llm not in model_storage.storage:
model_storage.add(
key=generation_llm,
config={
"name": "gpt-4o-latest",
"api_key": os.getenv("OPENAI_API_KEY"),
"base_url": os.getenv("OPENAI_BASE_URL"),
},
model_type="llm",
)

if validation_llm not in model_storage.storage:
model_storage.add(
key=validation_llm,
config={
"name": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
"base_url": os.getenv("OPENAI_BASE_URL"),
"temperature": 0,
},
model_type="llm",
)

if theme_validation_llm not in model_storage.storage:
model_storage.add(
key=theme_validation_llm,
config={
"name": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
"base_url": os.getenv("OPENAI_BASE_URL"),
"temperature": 0,
},
model_type="llm",
)
super().__init__(
model_storage=model_storage,
generation_llm=generation_llm,
Expand Down
3 changes: 1 addition & 2 deletions dialogue2graph/pipelines/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@


class BasePipeline(BaseModel):
# TODO: add docs
"""Abstract class for base pipeline"""
"""Base class for pipelines"""

name: str = Field(description="Name of the pipeline")
steps: list[
Expand Down
Loading