Skip to content

Commit 907ae34

Browse files
yupeshYuriy PeshkichevNotBioWaste905
authored
Evaluation added (#42)
* Evaluation steps in the Pipeline * Feat/report exporter (#45) * Remove api_key from model storage configuration before saving * split tests in folders --------- Co-authored-by: Yuriy Peshkichev <peshkichev@gpu6.ipavlov.mipt.ru> Co-authored-by: NotBioWaste905 <andruhabobr@gmail.com>
1 parent e3ae051 commit 907ae34

File tree

76 files changed

+9245
-2671
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+9245
-2671
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@ test.ipynb
1414
docs/build
1515
.ruff_cache
1616
.coverage
17-
htmlcov
17+
htmlcov

dialogue2graph/cli/commands/generate_graph_algo.py

Lines changed: 0 additions & 26 deletions
This file was deleted.
Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,73 @@
11
import json
22
from pathlib import Path
3-
from dialogue2graph.pipelines.d2g_extender.pipeline import Pipeline
3+
import datetime
4+
5+
6+
from dialogue2graph import metrics
7+
from dialogue2graph.pipelines.d2g_extender.pipeline import D2GExtenderPipeline
48
from dialogue2graph.pipelines.model_storage import ModelStorage
9+
from dialogue2graph.pipelines.helpers.parse_data import PipelineRawDataType
10+
511

612
ms = ModelStorage()
713

814

9-
def generate_extender(dialogues: str, config: Path, output_path: str):
10-
"""Generates graph from dialogues via d2g_llm pipeline using parameters from config
15+
def generate_extender(
16+
dialogs: str,
17+
graph: str,
18+
tgraph: str,
19+
enable_evals: bool,
20+
config: dict,
21+
graph_path: str,
22+
report_path: str,
23+
):
24+
"""Generates graph from dialogs via d2g_extender pipeline using parameters from config
1125
and saves graph dictionary to output_path"""
1226

1327
if config != {}:
1428
ms.load(config)
1529

16-
pipeline = Pipeline(ms)
17-
18-
result = pipeline.invoke(dialogues)
19-
print("Result:", result.graph_dict)
30+
pipeline = D2GExtenderPipeline(
31+
"d2g_ext",
32+
ms,
33+
step1_evals=metrics.PreDGEvalBase,
34+
extender_evals=metrics.PreDGEvalBase,
35+
step2_evals=metrics.DGEvalBase,
36+
end_evals=metrics.DGEvalBase,
37+
step=1,
38+
)
2039

21-
# Save results
22-
output_file = Path(output_path)
23-
output_file.parent.mkdir(parents=True, exist_ok=True)
40+
raw_data = PipelineRawDataType(
41+
dialogs=dialogs, supported_graph=graph, true_graph=tgraph
42+
)
43+
result, report = pipeline.invoke(raw_data, enable_evals=enable_evals)
44+
result_graph = {
45+
"nodes": result.graph_dict["nodes"],
46+
"edges": result.graph_dict["edges"],
47+
}
2448

25-
with open(output_file, "w", encoding="utf-8") as f:
26-
json.dump(result.graph_dict, f, indent=2, ensure_ascii=False)
49+
if graph_path is not None:
50+
Path(graph_path).parent.mkdir(parents=True, exist_ok=True)
51+
with open(graph_path, "w", encoding="utf-8") as f:
52+
json.dump({"graph": result_graph}, f, indent=2, ensure_ascii=False)
53+
else:
54+
print(json.dumps({"graph": result_graph}, indent=2, ensure_ascii=False))
55+
if report_path is None:
56+
print(str(report))
57+
now = datetime.datetime.now()
58+
report_path = (
59+
f"./report_{pipeline.name}_{now.strftime('%Y-%m-%d_%H-%M-%S')}.json"
60+
)
61+
report_path = Path(report_path)
62+
report_path.parent.mkdir(parents=True, exist_ok=True)
63+
if report_path.suffix == ".json":
64+
report.to_json(report_path)
65+
elif report_path.suffix == ".csv":
66+
report.to_csv(report_path)
67+
elif report_path.suffix == ".html":
68+
report.to_html(report_path)
69+
elif report_path.suffix == ".md":
70+
report.to_markdown(report_path)
71+
elif report_path.suffix == ".txt":
72+
report.to_text(report_path)
73+
print("report saved to ", report_path)
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import json
2+
from pathlib import Path
3+
import datetime
4+
5+
from dialogue2graph import metrics
6+
from dialogue2graph.pipelines.d2g_light.pipeline import D2GLightPipeline
7+
from dialogue2graph.pipelines.helpers.parse_data import PipelineRawDataType
8+
from dialogue2graph.pipelines.model_storage import ModelStorage
9+
10+
ms = ModelStorage()
11+
12+
13+
def generate_light(
14+
dialogs: str,
15+
tgraph: str,
16+
enable_evals: bool,
17+
config: dict,
18+
graph_path: str,
19+
report_path: str,
20+
):
21+
"""Generates graph from dialogs via d2g_light pipeline using parameters from config
22+
and saves graph dictionary to output_path"""
23+
24+
if config != {}:
25+
ms.load(config)
26+
pipeline = D2GLightPipeline(
27+
"d2g_light", ms, step2_evals=metrics.DGEvalBase, end_evals=metrics.DGEvalBase
28+
)
29+
30+
raw_data = PipelineRawDataType(dialogs=dialogs, true_graph=tgraph)
31+
result, report = pipeline.invoke(raw_data, enable_evals=enable_evals)
32+
result_graph = {
33+
"nodes": result.graph_dict["nodes"],
34+
"edges": result.graph_dict["edges"],
35+
}
36+
37+
if graph_path is not None:
38+
Path(graph_path).parent.mkdir(parents=True, exist_ok=True)
39+
with open(graph_path, "w", encoding="utf-8") as f:
40+
json.dump({"graph": result_graph}, f, indent=2, ensure_ascii=False)
41+
else:
42+
print(json.dumps({"graph": result_graph}, indent=2, ensure_ascii=False))
43+
if report_path is None:
44+
print(str(report))
45+
now = datetime.datetime.now()
46+
report_path = (
47+
f"./report_{pipeline.name}_{now.strftime('%Y-%m-%d_%H-%M-%S')}.json"
48+
)
49+
report_path = Path(report_path)
50+
report_path.parent.mkdir(parents=True, exist_ok=True)
51+
if report_path.suffix == ".json":
52+
report.to_json(report_path)
53+
elif report_path.suffix == ".csv":
54+
report.to_csv(report_path)
55+
elif report_path.suffix == ".html":
56+
report.to_html(report_path)
57+
elif report_path.suffix == ".md":
58+
report.to_markdown(report_path)
59+
elif report_path.suffix == ".txt":
60+
report.to_text(report_path)
61+
print("report saved to ", report_path)
Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,62 @@
11
import json
22
from pathlib import Path
3-
from dialogue2graph.pipelines.d2g_llm.pipeline import Pipeline
3+
import datetime
4+
5+
from dialogue2graph import metrics
6+
from dialogue2graph.pipelines.d2g_llm.pipeline import D2GLLMPipeline
7+
from dialogue2graph.pipelines.helpers.parse_data import PipelineRawDataType
48
from dialogue2graph.pipelines.model_storage import ModelStorage
59

610
ms = ModelStorage()
711

812

9-
def generate_llm(dialogues: str, config: Path, output_path: str):
10-
"""Generates graph from dialogues via d2g_llm pipeline using parameters from config
13+
def generate_llm(
14+
dialogs: str,
15+
tgraph: str,
16+
enable_evals: bool,
17+
config: dict,
18+
graph_path: str,
19+
report_path: str,
20+
):
21+
"""Generates graph from dialogs via d2g_llm pipeline using parameters from config
1122
and saves graph dictionary to output_path"""
1223

1324
if config != {}:
1425
ms.load(config)
15-
pipeline = Pipeline(ms)
1626

17-
result = pipeline.invoke(dialogues)
18-
print("Result:", result.graph_dict)
27+
pipeline = D2GLLMPipeline(
28+
"d2g_llm", ms, step2_evals=metrics.DGEvalBase, end_evals=metrics.DGEvalBase
29+
)
1930

20-
# Save results
21-
output_file = Path(output_path)
22-
output_file.parent.mkdir(parents=True, exist_ok=True)
31+
raw_data = PipelineRawDataType(dialogs=dialogs, true_graph=tgraph)
32+
result, report = pipeline.invoke(raw_data, enable_evals=enable_evals)
33+
result_graph = {
34+
"nodes": result.graph_dict["nodes"],
35+
"edges": result.graph_dict["edges"],
36+
}
2337

24-
with open(output_file, "w", encoding="utf-8") as f:
25-
json.dump(result.graph_dict, f, indent=2, ensure_ascii=False)
38+
if graph_path is not None:
39+
Path(graph_path).parent.mkdir(parents=True, exist_ok=True)
40+
with open(graph_path, "w", encoding="utf-8") as f:
41+
json.dump({"graph": result_graph}, f, indent=2, ensure_ascii=False)
42+
else:
43+
print(json.dumps({"graph": result_graph}, indent=2, ensure_ascii=False))
44+
if report_path is None:
45+
print(str(report))
46+
now = datetime.datetime.now()
47+
report_path = (
48+
f"./report_{pipeline.name}_{now.strftime('%Y-%m-%d_%H-%M-%S')}.json"
49+
)
50+
report_path = Path(report_path)
51+
report_path.parent.mkdir(parents=True, exist_ok=True)
52+
if report_path.suffix == ".json":
53+
report.to_json(report_path)
54+
elif report_path.suffix == ".csv":
55+
report.to_csv(report_path)
56+
elif report_path.suffix == ".html":
57+
report.to_html(report_path)
58+
elif report_path.suffix == ".md":
59+
report.to_markdown(report_path)
60+
elif report_path.suffix == ".txt":
61+
report.to_text(report_path)
62+
print("report saved to ", report_path)

dialogue2graph/cli/main.py

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import click
22
from dotenv import load_dotenv
33
from .commands.generate_data import generate_data
4-
from .commands.generate_graph_algo import generate_algo
4+
from .commands.generate_graph_light import generate_light
55
from .commands.generate_graph_llm import generate_llm
66
from .commands.generate_graph_extender import generate_extender
77

@@ -26,35 +26,76 @@ def gen_data(env: str, cfg: str, topic: str, output: str):
2626
@cli.command()
2727
@click.option("--env", "-e", help="Path to .env file", default=".env")
2828
@click.option("--cfg", "-c", help="Path to cfg.yml file", default="cfg.yml")
29-
@click.option("--dialogues", "-d", help="Input dialogues file", required=True)
30-
@click.option("--output", "-o", help="Output graph file", required=True)
31-
def gen_graph_algo(env: str, cfg: str, dialogues: str, output: str):
32-
"""Generate graph from dialogues data via d2g_algo pipeline"""
29+
@click.option("--dialogs", "-d", help="Input dialogs file", required=True)
30+
@click.option("--tgraph", "-tg", help="Input true graph file", required=False)
31+
@click.option("--output", "-o", help="Output graph file", required=False)
32+
@click.option("--report", "-r", help="Output report file", required=False)
33+
@click.option("--eval", "-ev", is_flag=True, help="Call pipeline evals", required=False)
34+
def gen_graph_light(
35+
env: str, cfg: str, dialogs: str, tgraph: str, output: str, report: str, eval: bool
36+
):
37+
"""Generate graph from dialogs data via d2g_algo pipeline"""
3338
load_dotenv(env)
34-
generate_algo(dialogues, cfg, output)
39+
generate_light(
40+
dialogs=dialogs,
41+
tgraph=tgraph,
42+
enable_evals=eval,
43+
config=cfg,
44+
graph_path=output,
45+
report_path=report,
46+
)
3547

3648

3749
@cli.command()
3850
@click.option("--env", "-e", help="Path to .env file", default=".env")
3951
@click.option("--cfg", "-c", help="Path to cfg.yml file", default="cfg.yml")
40-
@click.option("--dialogues", "-d", help="Input dialogues file", required=True)
41-
@click.option("--output", "-o", help="Output graph file", required=True)
42-
def gen_graph_llm(env: str, cfg: str, dialogues: str, output: str):
43-
"""Generate graph from dialogues data via d2g_llm pipeline"""
52+
@click.option("--dialogs", "-d", help="Input dialogs file", required=True)
53+
@click.option("--tgraph", "-tg", help="Input true graph file", required=False)
54+
@click.option("--output", "-o", help="Output graph file", required=False)
55+
@click.option("--report", "-r", help="Output report file", required=False)
56+
@click.option("--eval", "-ev", is_flag=True, help="Call pipeline evals", required=False)
57+
def gen_graph_llm(
58+
env: str, cfg: str, dialogs: str, tgraph: str, output: str, report: str, eval: bool
59+
):
60+
"""Generate graph from dialogs data via d2g_llm pipeline"""
4461
load_dotenv(env)
45-
generate_llm(dialogues, cfg, output)
62+
generate_llm(
63+
dialogs=dialogs,
64+
tgraph=tgraph,
65+
enable_evals=eval,
66+
config=cfg,
67+
graph_path=output,
68+
report_path=report,
69+
)
4670

4771

4872
@cli.command()
4973
@click.option("--env", "-e", help="Path to .env file", default=".env")
5074
@click.option("--cfg", "-c", help="Path to cfg.yml file", default="cfg.yml")
51-
@click.option("--dialogues", "-d", help="Input dialogues file", required=True)
52-
@click.option("--output", "-o", help="Output graph file", required=True)
53-
def gen_graph_extender(env: str, cfg: str, dialogues: str, output: str):
54-
"""Generate graph from dialogues data via d2g_llm pipeline"""
75+
@click.option("--dialogs", "-d", help="Input dialogs file", required=True)
76+
@click.option("--graph", "-g", help="Input graph file", required=False)
77+
@click.option("--tgraph", "-tg", help="Input true graph file", required=False)
78+
@click.option("--output", "-o", help="Output graph file", required=False)
79+
@click.option("--report", "-r", help="Output report file", required=False)
80+
@click.option("--eval", "-ev", is_flag=True, help="Call pipeline evals", required=False)
81+
def gen_graph_extender(
82+
env: str,
83+
cfg: str,
84+
dialogs: str,
85+
graph: str,
86+
tgraph: str,
87+
output: str,
88+
report: str,
89+
eval: bool,
90+
):
91+
"""Generate graph from dialogs data via d2g_llm pipeline"""
5592
load_dotenv(env)
56-
generate_extender(dialogues, cfg, output)
57-
58-
59-
if __name__ == "__main__":
60-
cli()
93+
generate_extender(
94+
dialogs=dialogs,
95+
graph=graph,
96+
tgraph=tgraph,
97+
enable_evals=eval,
98+
config=cfg,
99+
graph_path=output,
100+
report_path=report,
101+
)

dialogue2graph/datasets/complex_dialogues/find_graph_ends.py renamed to dialogue2graph/datasets/complex_dialogues/find_cycle_ends.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@
22
from pydantic import BaseModel, Field
33
from langchain_core.output_parsers import PydanticOutputParser
44
from langchain.prompts import PromptTemplate
5-
from langchain_core.language_models.chat_models import BaseChatModel
65
from dialogue2graph.pipelines.core.graph import Graph
6+
from langchain_core.language_models.chat_models import BaseChatModel
77

88

9-
def find_graph_ends(G: Graph, model: BaseChatModel) -> dict[str]:
9+
def find_cycle_ends(G: Graph, cycle_ends_model: BaseChatModel) -> dict[str]:
1010
"""
1111
To find nodes in dialogue graph G by condition in graph_ends_prompt_template with help of model.
1212
1313
Parameters:
1414
G (BaseGraph): The dialogue graph
15-
model (BaseChatModel): The LLM model to be used
15+
cycle_ends_model (BaseChatModel): The LLM model to be used
1616
1717
Returns:
1818
dict: {'value': bool, 'description': str}
@@ -52,7 +52,7 @@ class GraphEndsResult(BaseModel):
5252
"json_graph": graph_json,
5353
}
5454

55-
find_ends_chain = graph_ends_prompt | model | parser
55+
find_ends_chain = graph_ends_prompt | cycle_ends_model | parser
5656
response = find_ends_chain.invoke(input_data)
5757
result = {"value": response.ends, "description": response.description}
5858

0 commit comments

Comments
 (0)