Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion dialogue2graph/datasets/complex_dialogues/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from dialogue2graph.pipelines.core.dialogue_sampling import RecursiveDialogueSampler
from dialogue2graph.metrics.no_llm_metrics import match_triplets_dg
from dialogue2graph.metrics.llm_metrics import are_triplets_valid, is_theme_valid
from dialogue2graph.metrics.no_llm_validators import is_greeting_repeated_regex, is_dialog_closed_too_early_regex
from dialogue2graph.pipelines.core.graph import BaseGraph, Graph
from dialogue2graph.pipelines.core.algorithms import TopicGraphGenerator
from dialogue2graph.pipelines.core.schemas import GraphGenerationResult, DialogueGraph
Expand Down Expand Up @@ -145,7 +146,13 @@ def validate_graph_cycle_requirement(
for i, cycle in enumerate(cycles, 1):
logger.info(f"Cycle {i}: {' -> '.join(map(str, cycle + [cycle[0]]))}")

meets_requirements = cycles_count >= min_cycles
number_cycle_requirement = cycles_count >= min_cycles
no_start_cycle_requirement = not any([1 in c for c in cycles])
if not no_start_cycle_requirement:
logger.info("Detected cycle containing start node")

meets_requirements = number_cycle_requirement and no_start_cycle_requirement

if meets_requirements:
logger.info("✅ Graph meets cycle requirements")
else:
Expand Down Expand Up @@ -285,6 +292,15 @@ def generate_and_validate(self, topic: str) -> PipelineResult:
error_type=ErrorType.SAMPLING_FAILED,
message="Failed to sample valid dialogues - not all utterances are present",
)
if is_greeting_repeated_regex(sampled_dialogues):
return GenerationError(
error_type=ErrorType.SAMPLING_FAILED, message="Failed to sample valid dialogues - Opening phrases are repeated"
)
if is_dialog_closed_too_early_regex(sampled_dialogues):
return GenerationError(
error_type=ErrorType.SAMPLING_FAILED,
message="Failed to sample valid dialogues - Closing phrases appear in the middle of a dialogue",
)

theme_validation = is_theme_valid(graph, self.theme_validation_model, topic)
if not theme_validation["value"]:
Expand Down
9 changes: 9 additions & 0 deletions dialogue2graph/metrics/llm_validators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .validators import (
is_dialog_closed_too_early_emb_llm,
is_greeting_repeated_emb_llm,
)

__all__ = [
is_dialog_closed_too_early_emb_llm,
is_greeting_repeated_emb_llm,
]
187 changes: 187 additions & 0 deletions dialogue2graph/metrics/llm_validators/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
"""
Validators
--------------------------
This module contains validators to evaluate dialogs

"""

from typing import List

from pydantic import BaseModel, Field

from dialogue2graph.pipelines.core.dialogue import Dialogue
from dialogue2graph.pipelines.model_storage import ModelStorage
from dialogue2graph.metrics.similarity import compare_strings

from langchain_core.language_models.chat_models import BaseChatModel
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser


START_TURNS = [
"Greetings! How can I assist you?",
"Greetings! How can I help you?",
"Greetings! Would you like to do this?",
"Greetings! Could you tell me this?",
"Hello! How can I assist you?",
"Hello! How can I help you?",
"Hello! Would you like to do this?",
"Hello! Could you tell me this?",
"Hi! How can I assist you?",
"Hi! How can I help you?",
"Hi! Would you like to do this?",
"Hi! Could you tell me this?",
"Welcome to our assistant service! How can I assist you?",
"Welcome to our assistant service! How can I help you?",
"Welcome to our assistant service! Would you like to do this?",
"Welcome to our assistant service! Could you tell me this?",
]

END_TURNS = [
"Thank you for contacting us. Have a great day!",
"You're welcome! Have a great day.",
"Request confirmed. We're here to help if you have any other needs.",
"You're welcome! Have a great day!",
"Alright, if you need any further assistance, feel free to reach out. Have a great day!",
"Alright, feel free to reach out if you need anything else. Have a great day!",
"Alright, if you need anything else, feel free to reach out. Have a great day!",
"I'm sorry to see you go. Your subscription has been canceled. If you have any feedback, feel free to reach out to us.",
"Alright, if you have any other questions in the future, feel free to reach out. Have a great day!",
"Alright, if you need any further assistance, feel free to reach out. Have a great presentation!",
]

START_THRESHOLD = 0.2
END_THRESHOLD = 0.2


def _message_has_greeting_llm(model: BaseChatModel, text: str) -> bool:

class OpeningValidation(BaseModel):
isOpening: bool = Field(description="Whether the given utterance is considered greeting or not")

start_prompt = PromptTemplate(
input_variables=["text"],
template="""
You are given a dialog turn.
TURN: {text}
EVALUATE:
- Does the turn contain greeting phrases used to open a conversation?

Reply in JSON format:
{{"isOpening": true or false}}
""",
)
parser = PydanticOutputParser(pydantic_object=OpeningValidation)
opening_val_chain = start_prompt | model | parser
result = opening_val_chain.invoke({"text": text})
return result.isOpening


def _message_has_closing_llm(model: BaseChatModel, text: str) -> bool:

class ClosingValidation(BaseModel):
isClosing: bool = Field(description="Whether the given utterance is considered closing or not")

close_prompt = PromptTemplate(
input_variables=["text"],
template="""
You are given a dialog turn.
TURN: {text}
EVALUATE:
- Does the turn contain phrases used to close a conversation?

Reply in JSON format:
{{"isClosing": true or false}}
""",
)
parser = PydanticOutputParser(pydantic_object=ClosingValidation)
closing_val_chain = close_prompt | model | parser
result = closing_val_chain.invoke({"text": text})
return result.isClosing


def is_greeting_repeated_emb_llm(
dialogs: List[Dialogue], model_storage: ModelStorage, embedder_name: str, llm_name: str, starts: list = None
) -> bool:
"""
Checks if greeting is repeated within dialogues using pairwise distance and LLM assessment.
Args:
dialogs (List[Dialogue]): Dialog list from graph.
model_storage (ModelStorage): Model storage containing embedder and LLM model for evaluation.
embedder_name (str): Name of embedder model in model storage (ModelStorage).
llm_name (str): Name of LLM in model storage (ModelStorage).
starts (list): List of opening phrases. Defaults to None, so standard opening phrases are used.
Returns
bool: True if greeting has been repeated, False otherwise.
"""
if not starts:
starts = START_TURNS

if model_storage.storage.get(embedder_name):
if not model_storage.storage.get(embedder_name).model_type == "emb":
raise TypeError(f"The {embedder_name} model is not an embedder")
embedder_model = model_storage.storage[embedder_name].model
else:
raise KeyError(f"The embedder {embedder_name} not found in the given ModelStorage")

if model_storage.storage.get(llm_name):
if not model_storage.storage.get(llm_name).model_type == "llm":
raise TypeError(f"The {llm_name} model is not an LLM")
llm_model = model_storage.storage[llm_name].model
else:
raise KeyError(f"The LLM {llm_name} not found in the given ModelStorage")

for dialog in dialogs:
for i, message in enumerate(dialog.messages):
if i != 0 and message.participant == "assistant":
message_is_start = [compare_strings(start, message.text, embedder=embedder_model, embedder_th=START_THRESHOLD) for start in starts]
if any(message_is_start):
llm_eval = _message_has_greeting_llm(llm_model, message.text)
if llm_eval:
return True

return False


def is_dialog_closed_too_early_emb_llm(
dialogs: List[Dialogue], model_storage: ModelStorage, embedder_name: str, llm_name: str, ends: list = None
) -> bool:
"""
Checks if assistant tried to close dialogue in the middle using pairwise distance and LLM assessment.
Args:
dialogs (List[Dialogue]): Dialog list from graph.
model_storage (ModelStorage): Model storage containing embedder and LLM model for evaluation.
embedder_name (str): Name of embedder model in model storage (ModelStorage).
llm_name (str): Name of LLM in model storage (ModelStorage).
ends (list): List of closing phrases. Defaults to None, so standard closing phrases are used.
Returns
bool: True if greeting has been repeated, False otherwise.
"""
if not ends:
ends = END_TURNS

if model_storage.storage.get(embedder_name):
if not model_storage.storage.get(embedder_name).model_type == "emb":
raise TypeError(f"The {embedder_name} model is not an embedder")
embedder_model = model_storage.storage[embedder_name].model
else:
raise KeyError(f"The embedder {embedder_name} not found in the given ModelStorage")

if model_storage.storage.get(llm_name):
if not model_storage.storage.get(llm_name).model_type == "llm":
raise TypeError(f"The {llm_name} model is not an LLM")
llm_model = model_storage.storage[llm_name].model
else:
raise KeyError(f"The LLM {llm_name} not found in the given ModelStorage")

for dialog in dialogs:
last_turn_idx = len(dialog.messages) - 1
for i, message in enumerate(dialog.messages):
if i != last_turn_idx and message.participant == "assistant":
message_is_end = [compare_strings(end, message.text, embedder=embedder_model, embedder_th=END_THRESHOLD) for end in ends]
if any(message_is_end):
llm_eval = _message_has_closing_llm(llm_model, message.text)
if llm_eval:
return True

return False
9 changes: 9 additions & 0 deletions dialogue2graph/metrics/no_llm_validators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .validators import (
is_dialog_closed_too_early_regex,
is_greeting_repeated_regex,
)

__all__ = [
is_dialog_closed_too_early_regex,
is_greeting_repeated_regex,
]
56 changes: 56 additions & 0 deletions dialogue2graph/metrics/no_llm_validators/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""
Validators
--------------------------
This module contains validators to evaluate dialogs

"""

from typing import List
import re

from dialogue2graph.pipelines.core.dialogue import Dialogue


def _message_has_greeting_re(regex: str, text: str) -> bool:
return bool(re.match(regex, text, flags=re.IGNORECASE))


def _message_has_closing_re(regex: str, text: str) -> bool:
return bool(re.search(regex, text, flags=re.IGNORECASE))


def is_greeting_repeated_regex(dialogs: List[Dialogue], regex: str = None) -> bool:
"""
Checks if greeting is repeated within dialogues using regular expression.
Args:
dialogs (List[Dialogue]): Dialog list from graph.
regex (str): Regular expression to find start turns. Defaults to None, so standard regex is used.
Returns
bool: True if greeting has been repeated, False otherwise.
"""
if not regex:
regex = r"^hello|^hi|^greetings"
Copy link

Copilot AI Apr 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex pattern may inadvertently match 'greetings' anywhere in the text since only the first alternatives are anchored; consider grouping the alternatives as "^(hello|hi|greetings)" to ensure all patterns are anchored at the start.

Suggested change
regex = r"^hello|^hi|^greetings"
regex = r"^(hello|hi|greetings)"

Copilot uses AI. Check for mistakes.
for dialog in dialogs:
for i, message in enumerate(dialog.messages):
if i != 0 and message.participant == "assistant" and _message_has_greeting_re(regex, message.text):
return True
return False


def is_dialog_closed_too_early_regex(dialogs: List[Dialogue], regex: str = None) -> bool:
"""
Checks if assistant tried to close dialogue in the middle using regular expression.
Args:
dialogs (List[Dialogue]): Dialog list from graph.
regex (str): Regular expression to find end turns. Defaults to None, so standard regex is used.
Returns
bool: True if closing appeared too early, False otherwise.
"""
if not regex:
regex = r"have a (great|good|nice) day.$|goodbye.$"
for dialog in dialogs:
last_turn_idx = len(dialog.messages) - 1
for i, message in enumerate(dialog.messages):
if i != last_turn_idx and message.participant == "assistant" and _message_has_closing_re(regex, message.text):
return True
return False
29 changes: 29 additions & 0 deletions experiments/exp2025_04_03_selecting_emb_params/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
## Finding out best parameters for start and end loop detection

### Finding start and end nodes.ipynb

* Common start and end phrases were defined by frequency.
* Top 10 start phrases by frequency look too specific and contain specific detail on the dialog topic. To fix it
- common greetings and following questions were defined among most common openings,
- combinations of greeting and question were set as general start phrase set.
* Top 10 end phrases being enough general were chosen as general end phrase set.

### Selecting base params for start loop finding.ipynb

**Finding embedder threshold to detect similar utterances**

* Pairwise distance is computed between defined set of START_TURNS (16 utterances chosen earlier) and all utterances in d2g_generated dialogs,
* Mean distance is counted for each pair (start_turn, dialog_utterance) and then the average is calculated across each start turn.
* Start phrases have mean distance around 0.26, whereas non-start have mean distance around 0.43. Thus, threshold can be set as 0.26.

Same process was repeated for END_TURNS. End phrases have mean distance around 0.27, and non-end phrases around 0.47. This threshold can be set a bit higher as 0.27.

**Testing new validators on good and damaged dialogs from graph**

1. The graph examples were chosen - one for good graph, one for graph where opening phrase appears in the middle, one for graph where closing phrase appears in the middle
2. New llm validators were used to eval the dialogs from chosen dialogs (start and end thresholds were lowered to 0.2 as the results were more precise with this thresholds).

### Extra findings

* There appear such phrases as "Certainly! How can I assist you today?" which look very alike to start turns. So a smarter LLM should be used to detect them as non-opening phrase.
* There are too large greeting phrases (above 40 symbols), so they are not very similar to usual opening phrases, even if splited, as they contain very specific info. Need to be analysed.
19 changes: 19 additions & 0 deletions experiments/exp2025_04_03_selecting_emb_params/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[project]
name = "exp2025-04-03-selecting-emb-params"
version = "0.1.0"
description = ""
authors = [
{name = "anna-a-m",email = "anna.micj@gmail.com"}
]
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
]

[tool.poetry]
packages = [{include = "exp2025_04_03_selecting_emb_params", from = "src"}]


[build-system]
requires = ["poetry-core>=2.0.0,<3.0.0"]
build-backend = "poetry.core.masonry.api"
Loading