deeppavlov
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dialogue2graph/cli/commands/generate_graph_algo.py‎
Lines changed: 0 additions & 26 deletions b/‎dialogue2graph/cli/commands/generate_graph_algo.py‎
Lines changed: 0 additions & 26 deletions
diff --git a/‎dialogue2graph/cli/commands/generate_graph_extender.py‎
Lines changed: 59 additions & 12 deletions b/‎dialogue2graph/cli/commands/generate_graph_extender.py‎
Lines changed: 59 additions & 12 deletions
diff --git a/‎dialogue2graph/cli/commands/generate_graph_light.py‎
Lines changed: 61 additions & 0 deletions b/‎dialogue2graph/cli/commands/generate_graph_light.py‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎dialogue2graph/cli/commands/generate_graph_llm.py‎
Lines changed: 48 additions & 11 deletions b/‎dialogue2graph/cli/commands/generate_graph_llm.py‎
Lines changed: 48 additions & 11 deletions
diff --git a/‎dialogue2graph/cli/main.py‎
Lines changed: 61 additions & 20 deletions b/‎dialogue2graph/cli/main.py‎
Lines changed: 61 additions & 20 deletions
diff --git a/‎…ets/complex_dialogues/find_graph_ends.py‎ ‎…ets/complex_dialogues/find_cycle_ends.py‎dialogue2graph/datasets/complex_dialogues/find_graph_ends.py renamed to dialogue2graph/datasets/complex_dialogues/find_cycle_ends.py
Lines changed: 4 additions & 4 deletions b/‎…ets/complex_dialogues/find_graph_ends.py‎ ‎…ets/complex_dialogues/find_cycle_ends.py‎dialogue2graph/datasets/complex_dialogues/find_graph_ends.py renamed to dialogue2graph/datasets/complex_dialogues/find_cycle_ends.py
Lines changed: 4 additions & 4 deletions
@@ -14,4 +14,4 @@ test.ipynb
 docs/build
 .ruff_cache
 .coverage
-htmlcov
+htmlcov
@@ -1,26 +1,73 @@
 import json
 from pathlib import Path
-from dialogue2graph.pipelines.d2g_extender.pipeline import Pipeline
+import datetime
+
+
+from dialogue2graph import metrics
+from dialogue2graph.pipelines.d2g_extender.pipeline import D2GExtenderPipeline
 from dialogue2graph.pipelines.model_storage import ModelStorage
+from dialogue2graph.pipelines.helpers.parse_data import PipelineRawDataType
+
 
 ms = ModelStorage()
 
 
-def generate_extender(dialogues: str, config: Path, output_path: str):
-    """Generates graph from dialogues via d2g_llm pipeline using parameters from config
+def generate_extender(
+    dialogs: str,
+    graph: str,
+    tgraph: str,
+    enable_evals: bool,
+    config: dict,
+    graph_path: str,
+    report_path: str,
+):
+    """Generates graph from dialogs via d2g_extender pipeline using parameters from config
     and saves graph dictionary to output_path"""
 
     if config != {}:
         ms.load(config)
 
-    pipeline = Pipeline(ms)
-
-    result = pipeline.invoke(dialogues)
-    print("Result:", result.graph_dict)
+    pipeline = D2GExtenderPipeline(
+        "d2g_ext",
+        ms,
+        step1_evals=metrics.PreDGEvalBase,
+        extender_evals=metrics.PreDGEvalBase,
+        step2_evals=metrics.DGEvalBase,
+        end_evals=metrics.DGEvalBase,
+        step=1,
+    )
 
-    # Save results
-    output_file = Path(output_path)
-    output_file.parent.mkdir(parents=True, exist_ok=True)
+    raw_data = PipelineRawDataType(
+        dialogs=dialogs, supported_graph=graph, true_graph=tgraph
+    )
+    result, report = pipeline.invoke(raw_data, enable_evals=enable_evals)
+    result_graph = {
+        "nodes": result.graph_dict["nodes"],
+        "edges": result.graph_dict["edges"],
+    }
 
-    with open(output_file, "w", encoding="utf-8") as f:
-        json.dump(result.graph_dict, f, indent=2, ensure_ascii=False)
+    if graph_path is not None:
+        Path(graph_path).parent.mkdir(parents=True, exist_ok=True)
+        with open(graph_path, "w", encoding="utf-8") as f:
+            json.dump({"graph": result_graph}, f, indent=2, ensure_ascii=False)
+    else:
+        print(json.dumps({"graph": result_graph}, indent=2, ensure_ascii=False))
+    if report_path is None:
+        print(str(report))
+        now = datetime.datetime.now()
+        report_path = (
+            f"./report_{pipeline.name}_{now.strftime('%Y-%m-%d_%H-%M-%S')}.json"
+        )
+    report_path = Path(report_path)
+    report_path.parent.mkdir(parents=True, exist_ok=True)
+    if report_path.suffix == ".json":
+        report.to_json(report_path)
+    elif report_path.suffix == ".csv":
+        report.to_csv(report_path)
+    elif report_path.suffix == ".html":
+        report.to_html(report_path)
+    elif report_path.suffix == ".md":
+        report.to_markdown(report_path)
+    elif report_path.suffix == ".txt":
+        report.to_text(report_path)
+    print("report saved to ", report_path)
@@ -0,0 +1,61 @@
+import json
+from pathlib import Path
+import datetime
+
+from dialogue2graph import metrics
+from dialogue2graph.pipelines.d2g_light.pipeline import D2GLightPipeline
+from dialogue2graph.pipelines.helpers.parse_data import PipelineRawDataType
+from dialogue2graph.pipelines.model_storage import ModelStorage
+
+ms = ModelStorage()
+
+
+def generate_light(
+    dialogs: str,
+    tgraph: str,
+    enable_evals: bool,
+    config: dict,
+    graph_path: str,
+    report_path: str,
+):
+    """Generates graph from dialogs via d2g_light pipeline using parameters from config
+    and saves graph dictionary to output_path"""
+
+    if config != {}:
+        ms.load(config)
+    pipeline = D2GLightPipeline(
+        "d2g_light", ms, step2_evals=metrics.DGEvalBase, end_evals=metrics.DGEvalBase
+    )
+
+    raw_data = PipelineRawDataType(dialogs=dialogs, true_graph=tgraph)
+    result, report = pipeline.invoke(raw_data, enable_evals=enable_evals)
+    result_graph = {
+        "nodes": result.graph_dict["nodes"],
+        "edges": result.graph_dict["edges"],
+    }
+
+    if graph_path is not None:
+        Path(graph_path).parent.mkdir(parents=True, exist_ok=True)
+        with open(graph_path, "w", encoding="utf-8") as f:
+            json.dump({"graph": result_graph}, f, indent=2, ensure_ascii=False)
+    else:
+        print(json.dumps({"graph": result_graph}, indent=2, ensure_ascii=False))
+    if report_path is None:
+        print(str(report))
+        now = datetime.datetime.now()
+        report_path = (
+            f"./report_{pipeline.name}_{now.strftime('%Y-%m-%d_%H-%M-%S')}.json"
+        )
+    report_path = Path(report_path)
+    report_path.parent.mkdir(parents=True, exist_ok=True)
+    if report_path.suffix == ".json":
+        report.to_json(report_path)
+    elif report_path.suffix == ".csv":
+        report.to_csv(report_path)
+    elif report_path.suffix == ".html":
+        report.to_html(report_path)
+    elif report_path.suffix == ".md":
+        report.to_markdown(report_path)
+    elif report_path.suffix == ".txt":
+        report.to_text(report_path)
+    print("report saved to ", report_path)
@@ -1,25 +1,62 @@
 import json
 from pathlib import Path
-from dialogue2graph.pipelines.d2g_llm.pipeline import Pipeline
+import datetime
+
+from dialogue2graph import metrics
+from dialogue2graph.pipelines.d2g_llm.pipeline import D2GLLMPipeline
+from dialogue2graph.pipelines.helpers.parse_data import PipelineRawDataType
 from dialogue2graph.pipelines.model_storage import ModelStorage
 
 ms = ModelStorage()
 
 
-def generate_llm(dialogues: str, config: Path, output_path: str):
-    """Generates graph from dialogues via d2g_llm pipeline using parameters from config
+def generate_llm(
+    dialogs: str,
+    tgraph: str,
+    enable_evals: bool,
+    config: dict,
+    graph_path: str,
+    report_path: str,
+):
+    """Generates graph from dialogs via d2g_llm pipeline using parameters from config
     and saves graph dictionary to output_path"""
 
     if config != {}:
         ms.load(config)
-    pipeline = Pipeline(ms)
 
-    result = pipeline.invoke(dialogues)
-    print("Result:", result.graph_dict)
+    pipeline = D2GLLMPipeline(
+        "d2g_llm", ms, step2_evals=metrics.DGEvalBase, end_evals=metrics.DGEvalBase
+    )
 
-    # Save results
-    output_file = Path(output_path)
-    output_file.parent.mkdir(parents=True, exist_ok=True)
+    raw_data = PipelineRawDataType(dialogs=dialogs, true_graph=tgraph)
+    result, report = pipeline.invoke(raw_data, enable_evals=enable_evals)
+    result_graph = {
+        "nodes": result.graph_dict["nodes"],
+        "edges": result.graph_dict["edges"],
+    }
 
-    with open(output_file, "w", encoding="utf-8") as f:
-        json.dump(result.graph_dict, f, indent=2, ensure_ascii=False)
+    if graph_path is not None:
+        Path(graph_path).parent.mkdir(parents=True, exist_ok=True)
+        with open(graph_path, "w", encoding="utf-8") as f:
+            json.dump({"graph": result_graph}, f, indent=2, ensure_ascii=False)
+    else:
+        print(json.dumps({"graph": result_graph}, indent=2, ensure_ascii=False))
+    if report_path is None:
+        print(str(report))
+        now = datetime.datetime.now()
+        report_path = (
+            f"./report_{pipeline.name}_{now.strftime('%Y-%m-%d_%H-%M-%S')}.json"
+        )
+    report_path = Path(report_path)
+    report_path.parent.mkdir(parents=True, exist_ok=True)
+    if report_path.suffix == ".json":
+        report.to_json(report_path)
+    elif report_path.suffix == ".csv":
+        report.to_csv(report_path)
+    elif report_path.suffix == ".html":
+        report.to_html(report_path)
+    elif report_path.suffix == ".md":
+        report.to_markdown(report_path)
+    elif report_path.suffix == ".txt":
+        report.to_text(report_path)
+    print("report saved to ", report_path)
@@ -1,7 +1,7 @@
 import click
 from dotenv import load_dotenv
 from .commands.generate_data import generate_data
-from .commands.generate_graph_algo import generate_algo
+from .commands.generate_graph_light import generate_light
 from .commands.generate_graph_llm import generate_llm
 from .commands.generate_graph_extender import generate_extender
 
@@ -26,35 +26,76 @@ def gen_data(env: str, cfg: str, topic: str, output: str):
 @cli.command()
 @click.option("--env", "-e", help="Path to .env file", default=".env")
 @click.option("--cfg", "-c", help="Path to cfg.yml file", default="cfg.yml")
-@click.option("--dialogues", "-d", help="Input dialogues file", required=True)
-@click.option("--output", "-o", help="Output graph file", required=True)
-def gen_graph_algo(env: str, cfg: str, dialogues: str, output: str):
-    """Generate graph from dialogues data via d2g_algo pipeline"""
+@click.option("--dialogs", "-d", help="Input dialogs file", required=True)
+@click.option("--tgraph", "-tg", help="Input true graph file", required=False)
+@click.option("--output", "-o", help="Output graph file", required=False)
+@click.option("--report", "-r", help="Output report file", required=False)
+@click.option("--eval", "-ev", is_flag=True, help="Call pipeline evals", required=False)
+def gen_graph_light(
+    env: str, cfg: str, dialogs: str, tgraph: str, output: str, report: str, eval: bool
+):
+    """Generate graph from dialogs data via d2g_algo pipeline"""
     load_dotenv(env)
-    generate_algo(dialogues, cfg, output)
+    generate_light(
+        dialogs=dialogs,
+        tgraph=tgraph,
+        enable_evals=eval,
+        config=cfg,
+        graph_path=output,
+        report_path=report,
+    )
 
 
 @cli.command()
 @click.option("--env", "-e", help="Path to .env file", default=".env")
 @click.option("--cfg", "-c", help="Path to cfg.yml file", default="cfg.yml")
-@click.option("--dialogues", "-d", help="Input dialogues file", required=True)
-@click.option("--output", "-o", help="Output graph file", required=True)
-def gen_graph_llm(env: str, cfg: str, dialogues: str, output: str):
-    """Generate graph from dialogues data via d2g_llm pipeline"""
+@click.option("--dialogs", "-d", help="Input dialogs file", required=True)
+@click.option("--tgraph", "-tg", help="Input true graph file", required=False)
+@click.option("--output", "-o", help="Output graph file", required=False)
+@click.option("--report", "-r", help="Output report file", required=False)
+@click.option("--eval", "-ev", is_flag=True, help="Call pipeline evals", required=False)
+def gen_graph_llm(
+    env: str, cfg: str, dialogs: str, tgraph: str, output: str, report: str, eval: bool
+):
+    """Generate graph from dialogs data via d2g_llm pipeline"""
     load_dotenv(env)
-    generate_llm(dialogues, cfg, output)
+    generate_llm(
+        dialogs=dialogs,
+        tgraph=tgraph,
+        enable_evals=eval,
+        config=cfg,
+        graph_path=output,
+        report_path=report,
+    )
 
 
 @cli.command()
 @click.option("--env", "-e", help="Path to .env file", default=".env")
 @click.option("--cfg", "-c", help="Path to cfg.yml file", default="cfg.yml")
-@click.option("--dialogues", "-d", help="Input dialogues file", required=True)
-@click.option("--output", "-o", help="Output graph file", required=True)
-def gen_graph_extender(env: str, cfg: str, dialogues: str, output: str):
-    """Generate graph from dialogues data via d2g_llm pipeline"""
+@click.option("--dialogs", "-d", help="Input dialogs file", required=True)
+@click.option("--graph", "-g", help="Input graph file", required=False)
+@click.option("--tgraph", "-tg", help="Input true graph file", required=False)
+@click.option("--output", "-o", help="Output graph file", required=False)
+@click.option("--report", "-r", help="Output report file", required=False)
+@click.option("--eval", "-ev", is_flag=True, help="Call pipeline evals", required=False)
+def gen_graph_extender(
+    env: str,
+    cfg: str,
+    dialogs: str,
+    graph: str,
+    tgraph: str,
+    output: str,
+    report: str,
+    eval: bool,
+):
+    """Generate graph from dialogs data via d2g_llm pipeline"""
     load_dotenv(env)
-    generate_extender(dialogues, cfg, output)
-
-
-if __name__ == "__main__":
-    cli()
+    generate_extender(
+        dialogs=dialogs,
+        graph=graph,
+        tgraph=tgraph,
+        enable_evals=eval,
+        config=cfg,
+        graph_path=output,
+        report_path=report,
+    )
@@ -2,17 +2,17 @@
 from pydantic import BaseModel, Field
 from langchain_core.output_parsers import PydanticOutputParser
 from langchain.prompts import PromptTemplate
-from langchain_core.language_models.chat_models import BaseChatModel
 from dialogue2graph.pipelines.core.graph import Graph
+from langchain_core.language_models.chat_models import BaseChatModel
 
 
-def find_graph_ends(G: Graph, model: BaseChatModel) -> dict[str]:
+def find_cycle_ends(G: Graph, cycle_ends_model: BaseChatModel) -> dict[str]:
     """
     To find nodes in dialogue graph G by condition in graph_ends_prompt_template with help of model.
 
     Parameters:
         G (BaseGraph): The dialogue graph
-        model (BaseChatModel): The LLM model to be used
+        cycle_ends_model (BaseChatModel): The LLM model to be used
 
     Returns:
         dict: {'value': bool, 'description': str}
@@ -52,7 +52,7 @@ class GraphEndsResult(BaseModel):
         "json_graph": graph_json,
     }
 
-    find_ends_chain = graph_ends_prompt | model | parser
+    find_ends_chain = graph_ends_prompt | cycle_ends_model | parser
     response = find_ends_chain.invoke(input_data)
     result = {"value": response.ends, "description": response.description}