From 7000659a6e753c96474ed63fe61810b515b725c2 Mon Sep 17 00:00:00 2001 From: jasinliu <939282975@qq.com> Date: Thu, 10 Oct 2024 14:05:58 +0800 Subject: [PATCH 1/3] ragas --- hugegraph-llm/requirements.txt | 1 + .../src/hugegraph_llm/demo/rag_demo/app.py | 4 +- .../hugegraph_llm/demo/rag_demo/rag_block.py | 141 +++++++++--- .../operators/common_op/merge_dedup_rerank.py | 30 ++- .../hugegraph_llm/operators/graph_rag_task.py | 12 -- .../operators/llm_op/answer_synthesize.py | 202 ++++++++++++------ .../resources/demo/questions_template.xlsx | Bin 11747 -> 6387 bytes .../src/hugegraph_llm/utils/ragas_utils.py | 19 ++ hugegraph-python-client/requirements.txt | 2 +- 9 files changed, 286 insertions(+), 125 deletions(-) create mode 100644 hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt index e10cb22f6..8f4f1bb52 100644 --- a/hugegraph-llm/requirements.txt +++ b/hugegraph-llm/requirements.txt @@ -14,3 +14,4 @@ python-dotenv>=1.0.1 pyarrow~=17.0.0 # TODO: a temporary dependency for pandas, figure out why ImportError pandas~=2.2.2 openpyxl~=3.1.5 +ragas~=0.1.20 diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py index 0b5285aa0..02816275c 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py @@ -65,9 +65,9 @@ def init_rag_ui() -> gr.Interface: with gr.Tab(label="1. Build RAG Index 💡"): create_vector_graph_block() - with gr.Tab(label="2. (Graph)RAG & User Functions 📖"): + with gr.Tab(label="2,3. (Graph)RAG & User Functions 📖"): create_rag_block() - with gr.Tab(label="3. Others Tools 🚧"): + with gr.Tab(label="4. Others Tools 🚧"): create_other_block() return hugegraph_llm_ui diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py index f514a8333..62700d400 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py @@ -17,16 +17,20 @@ # pylint: disable=E1101 +import json import os -from typing import Tuple, Literal, Optional +from typing import Tuple, List, Literal, Optional +from datasets import Dataset import gradio as gr -import pandas as pd from gradio.utils import NamedString +import pandas as pd +from ragas import evaluate from hugegraph_llm.config import resource_path, prompt from hugegraph_llm.operators.graph_rag_task import RAGPipeline from hugegraph_llm.utils.log import log +from hugegraph_llm.utils.ragas_utils import RAGAS_METRICS_DICT def rag_answer( @@ -69,17 +73,29 @@ def rag_answer( rag.extract_keywords().keywords_to_vid().query_graphdb() # TODO: add more user-defined search strategies rag.merge_dedup_rerank(graph_ratio, rerank_method, near_neighbor_first, custom_related_information) - rag.synthesize_answer(raw_answer, vector_only_answer, graph_only_answer, graph_vector_answer, answer_prompt) + rag.synthesize_answer(answer_prompt) try: - context = rag.run(verbose=True, query=text, vector_search=vector_search, graph_search=graph_search) + context = rag.run( + verbose=True, + query=text, + raw_answer=raw_answer, + vector_only_answer=vector_only_answer, + graph_only_answer=graph_only_answer, + graph_vector_answer=graph_vector_answer, + ) if context.get("switch_to_bleu"): gr.Warning("Online reranker fails, automatically switches to local bleu rerank.") return ( - context.get("raw_answer", ""), - context.get("vector_only_answer", ""), - context.get("graph_only_answer", ""), - context.get("graph_vector_answer", ""), + context.get("raw_answer_result", ""), + context.get("vector_only_answer_result", ""), + context.get("graph_only_answer_result", ""), + context.get("graph_vector_answer_result", ""), + { + "vector_contexts": context.get("vector_contexts"), + "graph_contexts": context.get("graph_contexts"), + "graph_vector_contexts": context.get("graph_vector_contexts"), + }, ) except ValueError as e: log.critical(e) @@ -125,9 +141,7 @@ def toggle_slider(enable): ) graph_ratio = gr.Slider(0, 1, 0.5, label="Graph Ratio", step=0.1, interactive=False) - graph_vector_radio.change( - toggle_slider, inputs=graph_vector_radio, outputs=graph_ratio - ) # pylint: disable=no-member + graph_vector_radio.change(toggle_slider, inputs=graph_vector_radio, outputs=graph_ratio) # pylint: disable=no-member near_neighbor_first = gr.Checkbox( value=False, label="Near neighbor first(Optional)", @@ -136,6 +150,10 @@ def toggle_slider(enable): custom_related_information = gr.Text( prompt.custom_rerank_info, label="Custom related information(Optional)", + info=( + "Used for rerank, can increase the weight of knowledge related to it, such as `law`. " + "Multiple values can be separated by commas." + ), ) btn = gr.Button("Answer Question", variant="primary") @@ -160,35 +178,48 @@ def toggle_slider(enable): > 1. Download the template file & fill in the questions you want to test. > 2. Upload the file & click the button to generate answers. (Preview shows the first 40 lines) > 3. The answer options are the same as the above RAG/Q&A frame - """) + """ + ) + + # TODO: Replace string with python constant tests_df_headers = [ "Question", - "Expected Answer", - "Basic LLM Answer", - "Vector-only Answer", "Graph-only Answer", "Graph-Vector Answer", + "Vector-only Answer", + "Basic LLM Answer", + "Expected Answer", ] + rag_answer_header_dict = { + "Vector-only Answer": "Vector Contexts", + "Graph-only Answer": "Graph Contexts", + "Graph-Vector Answer": "Graph-Vector Contexts", + } + answers_path = os.path.join(resource_path, "demo", "questions_answers.xlsx") questions_path = os.path.join(resource_path, "demo", "questions.xlsx") questions_template_path = os.path.join(resource_path, "demo", "questions_template.xlsx") + ragas_metrics_list = list(RAGAS_METRICS_DICT.keys()) + def read_file_to_excel(file: NamedString, line_count: Optional[int] = None): - df = None + if os.path.exists(answers_path): + os.remove(answers_path) + df = pd.DataFrame() if not file: return pd.DataFrame(), 1 if file.name.endswith(".xlsx"): df = pd.read_excel(file.name, nrows=line_count) if file else pd.DataFrame() elif file.name.endswith(".csv"): df = pd.read_csv(file.name, nrows=line_count) if file else pd.DataFrame() - df.to_excel(questions_path, index=False) - if df.empty: - df = pd.DataFrame([[""] * len(tests_df_headers)], columns=tests_df_headers) else: - df.columns = tests_df_headers + raise gr.Error("Only support .xlsx and .csv files.") + df.to_excel(questions_path, index=False) # truncate the dataframe if it's too long if len(df) > 40: return df.head(40), 40 + if len(df) == 0: + gr.Warning("No data in the file.") return df, len(df) def change_showing_excel(line_count): @@ -217,7 +248,7 @@ def several_rag_answer( total_rows = len(df) for index, row in df.iterrows(): question = row.iloc[0] - basic_llm_answer, vector_only_answer, graph_only_answer, graph_vector_answer = rag_answer( + llm_answer, vector_only_answer, graph_only_answer, graph_vector_answer, contexts = rag_answer( question, is_raw_answer, is_vector_only_answer, @@ -229,18 +260,30 @@ def several_rag_answer( custom_related_information, answer_prompt, ) - df.at[index, "Basic LLM Answer"] = basic_llm_answer - df.at[index, "Vector-only Answer"] = vector_only_answer - df.at[index, "Graph-only Answer"] = graph_only_answer - df.at[index, "Graph-Vector Answer"] = graph_vector_answer + df.at[index, "Basic LLM Answer"] = llm_answer if llm_answer else None + df.at[index, "Vector-only Answer"] = vector_only_answer if vector_only_answer else None + df.at[index, "Graph-only Answer"] = graph_only_answer if graph_only_answer else None + df.at[index, "Graph-Vector Answer"] = graph_vector_answer if graph_vector_answer else None + if "Vector Contexts" not in df.columns: + df["Vector Contexts"] = None + df["Graph Contexts"] = None + df["Graph-Vector Contexts"] = None + df.at[index, "Vector Contexts"] = contexts.get("vector_contexts") + df.at[index, "Graph Contexts"] = contexts.get("graph_contexts") + df.at[index, "Graph-Vector Contexts"] = contexts.get("graph_vector_contexts") progress((index + 1, total_rows)) - answers_path = os.path.join(resource_path, "demo", "questions_answers.xlsx") + + df = df.dropna(axis=1, how="all") + df_to_show = df[[col for col in tests_df_headers if col in df.columns]] + for rag_context_header in rag_answer_header_dict.values(): + if rag_context_header in df.columns: + df[rag_context_header] = df[rag_context_header].apply(lambda x: json.dumps(x, ensure_ascii=False)) df.to_excel(answers_path, index=False) - return df.head(answer_max_line_count), answers_path + return df_to_show.head(answer_max_line_count), answers_path with gr.Row(): with gr.Column(): - questions_file = gr.File(file_types=[".xlsx", ".csv"], label="Questions File (.xlsx & csv)") + questions_file = gr.File(file_types=[".xlsx", ".csv"], label="Questions File (.xlsx & .csv)") with gr.Column(): test_template_file = os.path.join(resource_path, "demo", "questions_template.xlsx") gr.File(value=test_template_file, label="Download Template File") @@ -266,3 +309,45 @@ def several_rag_answer( ) questions_file.change(read_file_to_excel, questions_file, [qa_dataframe, answer_max_line_count]) answer_max_line_count.change(change_showing_excel, answer_max_line_count, qa_dataframe) + + def evaluate_rag(metrics: List[str], num: int): + answers_df = pd.read_excel(answers_path) + answers_df = answers_df.head(num) + if not any(answers_df.columns.isin(rag_answer_header_dict)): + raise gr.Error("No RAG answers found in the answer file.") + rag_answers = [answer for answer in rag_answer_header_dict if answer in answers_df.columns] + df = pd.DataFrame() + + for answer in rag_answers: + context_header = rag_answer_header_dict[answer] + answers_df[context_header] = answers_df[context_header].apply(json.loads) + rag_data = { + "question": answers_df["Question"].to_list(), + "answer": answers_df[answer].to_list(), + "contexts": answers_df[rag_answer_header_dict[answer]].to_list(), + "ground_truth": answers_df["Expected Answer"].to_list(), + } + dataset = Dataset.from_dict(rag_data) + score = evaluate(dataset, metrics=[RAGAS_METRICS_DICT[metric] for metric in metrics]) + print(score.scores.to_pandas()) + df = pd.concat([df, score.scores.to_pandas()]) + df.insert(0, 'method', rag_answers) + return df + + with gr.Row(): + with gr.Column(): + ragas_metrics = gr.Dropdown( + choices=ragas_metrics_list, + value=ragas_metrics_list[:4], + multiselect=True, + label="Metrics", + info="Several evaluation metrics from `ragas`, please refer to https://docs.ragas.io/en/stable/concepts/metrics/index.html", + ) + with gr.Column(): + dataset_nums = gr.Number(1, label="Dataset Numbers", minimum=1, maximum=1) + ragas_btn = gr.Button("Evaluate RAG", variant="primary") + ragas_btn.click( + evaluate_rag, + inputs=[ragas_metrics, dataset_nums], + outputs=[gr.DataFrame(label="RAG Evaluation Results", headers=ragas_metrics_list)], + ) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py index c4ff757bd..ce447e16b 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py @@ -66,21 +66,21 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: if self.custom_related_information: query = query + self.custom_related_information context["graph_ratio"] = self.graph_ratio - vector_search = context.get("vector_search", False) - graph_search = context.get("graph_search", False) - if graph_search and vector_search: - graph_length = int(self.topk * self.graph_ratio) - vector_length = self.topk - graph_length - else: - graph_length = self.topk - vector_length = self.topk - + + raw_answer = context.get("raw_answer", False) + vector_only_answer = context.get("vector_only_answer", False) + graph_only_answer = context.get("graph_only_answer", False) + graph_vector_answer = context.get("graph_vector_answer", False) + + if raw_answer and not (vector_only_answer or graph_only_answer or graph_vector_answer): + return context + vector_result = context.get("vector_result", []) - vector_length = min(len(vector_result), vector_length) + vector_length = min(len(vector_result), self.topk) vector_result = self._dedup_and_rerank(query, vector_result, vector_length) graph_result = context.get("graph_result", []) - graph_length = min(len(graph_result), graph_length) + graph_length = min(len(graph_result), self.topk) if self.near_neighbor_first: graph_result = self._rerank_with_vertex_degree( query, @@ -94,12 +94,17 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: else: graph_result = self._dedup_and_rerank(query, graph_result, graph_length) + context["graph_rerank_length"] = min(graph_length, int(self.topk * self.graph_ratio)) + context["vector_rerank_length"] = min(vector_length, self.topk - int(self.topk * self.graph_ratio)) + context["vector_result"] = vector_result context["graph_result"] = graph_result return context def _dedup_and_rerank(self, query: str, results: List[str], topn: int) -> List[str]: + if topn == 0: + return [] results = list(set(results)) if self.method == "bleu": return _bleu_rerank(query, results)[:topn] @@ -116,6 +121,9 @@ def _rerank_with_vertex_degree( vertex_degree_list: Optional[List[List[str]]], knowledge_with_degree: Dict[str, List[str]], ) -> List[str]: + if topn == 0: + return [] + if vertex_degree_list is None or len(vertex_degree_list) == 0: return self._dedup_and_rerank(query, results, topn) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py index dd75b18e0..0e12127c5 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py @@ -169,28 +169,16 @@ def merge_dedup_rerank( def synthesize_answer( self, - raw_answer: bool = False, - vector_only_answer: bool = True, - graph_only_answer: bool = False, - graph_vector_answer: bool = False, answer_prompt: Optional[str] = None, ): """ Add an answer synthesis operator to the pipeline. - :param raw_answer: Whether to return raw answers. - :param vector_only_answer: Whether to return vector-only answers. - :param graph_only_answer: Whether to return graph-only answers. - :param graph_vector_answer: Whether to return graph-vector combined answers. :param answer_prompt: Template for the answer synthesis prompt. :return: Self-instance for chaining. """ self._operators.append( AnswerSynthesize( - raw_answer=raw_answer, - vector_only_answer=vector_only_answer, - graph_only_answer=graph_only_answer, - graph_vector_answer=graph_vector_answer, prompt_template=answer_prompt, ) ) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py index 5272f9066..6b77efbff 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py @@ -18,7 +18,7 @@ # pylint: disable=W0621 import asyncio -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from hugegraph_llm.config import prompt from hugegraph_llm.models.llms.base import BaseLLM @@ -28,19 +28,33 @@ DEFAULT_ANSWER_TEMPLATE = prompt.answer_prompt +def _get_vector_result_str(vector_result: List[str]) -> str: + if vector_result: + return "Phrases related to the query:\n" + "\n".join( + f"{i + 1}. {res}" for i, res in enumerate(vector_result) + ) + no_vector_data_msg = "No (vector)phrase related to the query." + log.warning(no_vector_data_msg) + return no_vector_data_msg + +def _get_graph_result_str(graph_context_head: str, graph_result: List[str]) -> str: + if graph_result: + return graph_context_head + "\n".join( + f"{i + 1}. {res}" for i, res in enumerate(graph_result) + ) + no_graph_data_msg = "No related graph data found for current query." + log.warning(no_graph_data_msg) + return no_graph_data_msg + class AnswerSynthesize: def __init__( - self, - llm: Optional[BaseLLM] = None, - prompt_template: Optional[str] = None, - question: Optional[str] = None, - context_body: Optional[str] = None, - context_head: Optional[str] = None, - context_tail: Optional[str] = None, - raw_answer: bool = False, - vector_only_answer: bool = True, - graph_only_answer: bool = False, - graph_vector_answer: bool = False, + self, + llm: Optional[BaseLLM] = None, + prompt_template: Optional[str] = None, + question: Optional[str] = None, + context_body: Optional[str] = None, + context_head: Optional[str] = None, + context_tail: Optional[str] = None, ): self._llm = llm self._prompt_template = prompt_template or DEFAULT_ANSWER_TEMPLATE @@ -48,10 +62,6 @@ def __init__( self._context_body = context_body self._context_head = context_head self._context_tail = context_tail - self._raw_answer = raw_answer - self._vector_only_answer = vector_only_answer - self._graph_only_answer = graph_only_answer - self._graph_vector_answer = graph_vector_answer def run(self, context: Dict[str, Any]) -> Dict[str, Any]: if self._llm is None: @@ -63,98 +73,148 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: self._question = context.get("query") or None assert self._question is not None, "No question for synthesizing." - context_head_str = context.get("synthesize_context_head") or self._context_head or "" - context_tail_str = context.get("synthesize_context_tail") or self._context_tail or "" + context_head_str = ( + context.get("synthesize_context_head") or self._context_head or "" + ) + context_tail_str = ( + context.get("synthesize_context_tail") or self._context_tail or "" + ) if self._context_body is not None: - context_str = (f"{context_head_str}\n" - f"{self._context_body}\n" - f"{context_tail_str}".strip("\n")) + context_str = ( + f"{context_head_str}\n" + f"{self._context_body}\n" + f"{context_tail_str}".strip("\n") + ) - final_prompt = self._prompt_template.format(context_str=context_str, query_str=self._question) + final_prompt = self._prompt_template.format( + context_str=context_str, query_str=self._question + ) response = self._llm.generate(prompt=final_prompt) return {"answer": response} - vector_result = context.get("vector_result") - if vector_result: - vector_result_context = "Phrases related to the query:\n" + "\n".join( - f"{i + 1}. {res}" for i, res in enumerate(vector_result) - ) - else: - vector_result_context = "No (vector)phrase related to the query." - - graph_result = context.get("graph_result") - if graph_result: - graph_context_head = context.get("graph_context_head", "Knowledge from graphdb for the query:\n") - graph_result_context = graph_context_head + "\n".join( - f"{i + 1}. {res}" for i, res in enumerate(graph_result) + context = asyncio.run( + self.async_generate( + context, + context_head_str, + context_tail_str ) - else: - graph_result_context = "No related graph data found for current query." - log.warning(graph_result_context) - - context = asyncio.run(self.async_generate(context, context_head_str, context_tail_str, - vector_result_context, graph_result_context)) + ) return context - async def async_generate(self, context: Dict[str, Any], context_head_str: str, - context_tail_str: str, vector_result_context: str, - graph_result_context: str): + async def async_generate( + self, + context: Dict[str, Any], + context_head_str: str, + context_tail_str: str + ): # pylint: disable=R0912 (too-many-branches) verbose = context.get("verbose") or False # TODO: replace task_cache with a better name task_cache = {} - if self._raw_answer: + + raw_answer = context.get("raw_answer", False) + vector_only_answer = context.get("vector_only_answer", False) + graph_only_answer = context.get("graph_only_answer", False) + graph_vector_answer = context.get("graph_vector_answer", False) + + if raw_answer: final_prompt = self._question - task_cache["raw_task"] = asyncio.create_task(self._llm.agenerate(prompt=final_prompt)) - if self._vector_only_answer: - context_str = (f"{context_head_str}\n" - f"{vector_result_context}\n" - f"{context_tail_str}".strip("\n")) - - final_prompt = self._prompt_template.format(context_str=context_str, query_str=self._question) - task_cache["vector_only_task"] = asyncio.create_task(self._llm.agenerate(prompt=final_prompt)) - if self._graph_only_answer: - context_str = (f"{context_head_str}\n" - f"{graph_result_context}\n" - f"{context_tail_str}".strip("\n")) - - final_prompt = self._prompt_template.format(context_str=context_str, query_str=self._question) - task_cache["graph_only_task"] = asyncio.create_task(self._llm.agenerate(prompt=final_prompt)) - if self._graph_vector_answer: + task_cache["raw_task"] = asyncio.create_task( + self._llm.agenerate(prompt=final_prompt) + ) + if vector_only_answer: + vector_result = context.get("vector_result") + vector_result_context = _get_vector_result_str(vector_result) + + context_str = ( + f"{context_head_str}\n" + f"{vector_result_context}\n" + f"{context_tail_str}".strip("\n") + ) + context["vector_contexts"] = vector_result + + final_prompt = self._prompt_template.format( + context_str=context_str, query_str=self._question + ) + task_cache["vector_only_task"] = asyncio.create_task( + self._llm.agenerate(prompt=final_prompt) + ) + if graph_only_answer: + + graph_result = context.get("graph_result") + graph_context_head = context.get( + "graph_context_head", "Knowledge from graphdb for the query:\n" + ) + graph_result_context = _get_graph_result_str(graph_context_head, graph_result) + + context_str = ( + f"{context_head_str}\n" + f"{graph_result_context}\n" + f"{context_tail_str}".strip("\n") + ) + context["graph_contexts"] = graph_result + + final_prompt = self._prompt_template.format( + context_str=context_str, query_str=self._question + ) + task_cache["graph_only_task"] = asyncio.create_task( + self._llm.agenerate(prompt=final_prompt) + ) + if graph_vector_answer: + vector_result = context.get("vector_result") + vector_rerank_length = context.get("vector_rerank_length") + vector_result_context = _get_vector_result_str(vector_result[:vector_rerank_length]) + + graph_result = context.get("graph_result") + graph_rerank_length = context.get("graph_rerank_length") + graph_context_head = context.get( + "graph_context_head", "Knowledge from graphdb for the query:\n" + ) + graph_result_context = _get_graph_result_str(graph_context_head, graph_result[:graph_rerank_length]) context_body_str = f"{vector_result_context}\n{graph_result_context}" + + context["graph_vector_contexts"] = vector_result[:vector_rerank_length] + graph_result[:graph_rerank_length] + if context.get("graph_ratio", 0.5) < 0.5: context_body_str = f"{graph_result_context}\n{vector_result_context}" - context_str = (f"{context_head_str}\n" - f"{context_body_str}\n" - f"{context_tail_str}".strip("\n")) + context_str = ( + f"{context_head_str}\n" + f"{context_body_str}\n" + f"{context_tail_str}".strip("\n") + ) - final_prompt = self._prompt_template.format(context_str=context_str, query_str=self._question) + final_prompt = self._prompt_template.format( + context_str=context_str, query_str=self._question + ) + task_cache["graph_vector_task"] = asyncio.create_task( self._llm.agenerate(prompt=final_prompt) ) # TODO: use log.debug instead of print if task_cache.get("raw_task"): response = await task_cache["raw_task"] - context["raw_answer"] = response + context["raw_answer_result"] = response if verbose: print(f"\033[91mANSWER: {response}\033[0m") if task_cache.get("vector_only_task"): response = await task_cache["vector_only_task"] - context["vector_only_answer"] = response + context["vector_only_answer_result"] = response if verbose: print(f"\033[91mANSWER: {response}\033[0m") if task_cache.get("graph_only_task"): response = await task_cache["graph_only_task"] - context["graph_only_answer"] = response + context["graph_only_answer_result"] = response if verbose: print(f"\033[91mANSWER: {response}\033[0m") if task_cache.get("graph_vector_task"): response = await task_cache["graph_vector_task"] - context["graph_vector_answer"] = response + context["graph_vector_answer_result"] = response if verbose: print(f"\033[91mANSWER: {response}\033[0m") - ops = sum([self._raw_answer, self._vector_only_answer, self._graph_only_answer, self._graph_vector_answer]) - context['call_count'] = context.get('call_count', 0) + ops + ops = sum( + [raw_answer, vector_only_answer, graph_only_answer, graph_vector_answer] + ) + context["call_count"] = context.get("call_count", 0) + ops return context diff --git a/hugegraph-llm/src/hugegraph_llm/resources/demo/questions_template.xlsx b/hugegraph-llm/src/hugegraph_llm/resources/demo/questions_template.xlsx index deb70c121c82c3376296d5855be7b877ed0ea6c3..9b630f7de42595ed8cbbef05797905cf63bf020a 100644 GIT binary patch literal 6387 zcmaJ`2RzjO|394*B72>2_K55dlE^wEnT50W-kT&lduE4{q9S`|b4b?NBYP9F>3>ow zegD7r<8gQHaj)m=^L{{Cq=0e(Km8Svqhi{**!gA!A-! z)jWs%4SERxxbj8SZgJih0+Gv_d=7T+qs-b+pzn%td|bJy)I~MiP;%_1sWSu zRMCy0ga)dnx=G426NWM)JK^h%)p8Wua`c;d6vBt9Y#1p;9Ah1In$njaf%ameVj3SQ zS7O?KgE6sbFsTxS*||R zv>D+eJYn@4*W71p!CoQ`=ID&i#>OudhnNM&KTysT?U6ZFD~bT3VJ!z#V-1c_1z3SJ zbUDV}w;}YFeNOx-VV3v#;uu!B9Zju1;^Wjh%-^J)%^iUFVNv4IutZd-9ou(`(c70x z(bZavkzsQ6zTmJF1vaM0bOLB@tJP@#@%(Fcw5I zBIapII~75Y-s{QXV)MloF{x)x*A=I}PUE>fXFD8L^=NC1jdFSB4XwJV^J!9^oF#pu z1;Zf3qBup(!-AzYs!gW_n-6XEjt2_|NOu5qZy7C-JWQLl#r*jSg1WM{+Xf-)D(~%p zT{}xZfnW1*QqyAZOMU{XYD+0*RbhZj$$QJ)_RZ8w6WR?neI*BP!|+8wpis%)8;;Ss z(#iqaVvH%Pq+cj~T!@=Vm^C$FWRG6FL1+Z6J8+?jCTZoF`t=Dj60BT$7uZnnT;RSB zKL)~>JDPPsI+#6d@1P0iy^gRr@^#B?`)H=Sizxzm|N-c{rEZKnNF?Gw6*pzo_K_Z}V3KE+oAKjx@)KRDca#Zh~*60x2hI|)aJbVo`% zKBdS4Qu)4>4Zr@lWqx{)#z1fGBMSAEN#>7z=v6mbH1`LgWRy1`4k$31ns8k^s%7w0pJQ3DU zzk4@BTNE`NKeGU(X@$}Wl;D~9`Bq2?SHHM@gpxz#m266|nD(4I+TvhU%FbG*)hAPy zfw_g1I%rECV5cpL%oW&0c-1M&FA?ZQkSofMAJ{=*`mY>5$v%K5@_t)7DEeVI;6nOaq>@(z1=B?6UCgIEx)Fg3s+^{r`dFu;S}WsvJYGk3f6 z1r4#%w*oBgcaG*?5e$NX``E9FrMr`PQy;a37{BtiQya`pWQf7AQw6;wopbQa4idns zGMKrKLNo~1sgAosj!qS~!E4IfJH(Y748$5Q2?0Ows*LI7dJ0V0%?gh{#7Z0Iu#%Qa za&z`yUH4)Abhr%tV6Ue>vfV>KEW8SmYy+=qXyy2b6NCVlu7Xx@lwi8N_%qDK2@E!G zVp!GMZuBO#wYsN3a)3)F*5cpLF<}ZI-^gPky>C@pi4yFnvtGt#??MZgpHblPK|=~t z_iQqByV*8HUzGK~7|LD(A2ztSKIwfkIGIv&gPG7wb`c9+B=u&8zHHN(F=hSE2&d4f z?Wd#XVP8yrqah2+DrI?tP2qbe^pNYjVd!e3FBPg&0I>MQt_xJwp!JV4o94%Oidp?uf%zkPNvN55wQ&jPpNL2z@%!V>q$F+uw|*?rO62)G_0&qx1p+g=?(`aN$ZZuo*6jsq@)=#1`M#?OYY>PsyyPw z>oQ_9lA8!{UK?m@)8al!5TdeZI@7;4CFMN$+=7RWQ#};a6F#cAld=|FOz~DC%-npqvN?##22LEog z1Zy9AldRoaw+l@Ipl)EqxP01DYOWD*Jzy|ZOuPlucM#c4W8?;ZXXf7+eoTW0G?LWG z+Sgy=5v#w4aoJ-_{^)C7@%X{y+YSB3uisp9?g)GoIH2>^ti&w5!wQP^RIju-*Z|FB z+dqiXJ*@faW~b##B;XxO*%+*jUs3W!CU(0Ny`|-L!$?I|`b1Fl6E>~5?jc>i;4C{~ z+3YXD1F|c5y-NdT5DSFv7__3WJ=fe*E$n+HI1CbJ#MG6s%HSe=FHGbpOI+Xgqs)dN zF0|f)?-`?OT0K+sL3ji4+5@ez`G(llTdOGrWnDq<$gr*-bxDo3OHnY}J=6I(&bFNq z>V97m24CQUmysBaz6^zA(N{CeBt#2KCT?fSRm8R##F}xyi)0g>Sytx8OFe-)9;gSl{PLS|j2jDTjMZ1*u9vM6b&La%-6UvVd4J$dLRkNMW! zhbo(7K{g`B!qIa5!)HIV*HLg_$^3kBR~lJBTayOn_D8wSVyNwR`ud>Pa`Reo7$>d? zJD<4TTvxqGq)~us^B{~PJ7(A#wGzf{j0eaOtdq6K<)U#aF^onqRp?{#`twC=2qZ=K z#p&_BPmLj@56|SGWlX_Azux99G@f2Bs`5YO`#8(-GgB6Wi=w8(Bn=e)NqPcrk=EH z*@Zq*d#zj-Go=f%*#X^BM2W81>L~h*_V9U23Z67!9NPmer8<%TgBe=IwkF0_1CzP; zeWcKZ61Imz zzxX5_k7fI^GUtfr(+Z(o?cm@jix03q__`5Us^ilwmP{d-DQm%+q=3rGrd79Lzo?eW zIu(l$vr}NY`Rb*bN||g4)%)pS&xSko4?n5ne~)@B-0m>PBbs7IFbje-(>gDCRTaN- zbR^BU8iG(VKD+&Bz!(^j&P8KQIrU0E8D&6x`I0|c)hF0Gjry%s;vR%0P)xJ4Jq|afPUD)HubnGYqdwR zVJ|0QFdH(4x(+Abl2iRbj+&Qpl7Sj(XP5Xo}ld zI~rL#>ZrKc8acpD-J)2fU7>*!_b9GaJ+^Gt2iJTU7(u6!>FHCzAEsJ6GZw$IK18n~ zrWz?)L{e+)VvN{yFB$YuU!l$l%OLJGr^Q4U1d}6*T{UmTm1c@6)iIL*`=$WQ#9Kp4 zk$@|oupc~)XC9LYt^mEy55c%ZC*PrsXS1cj_>$iod()k}JW3GFSCqktLSMwgN79Ot zXt%*l3U(zfx=;geB)U>pLAsrz{@RmegZyHQ_X47X5gx{5a6Ll*Bma6c{nGg=lA33h z*hg&|mf+mFyVc)Pl=R@wu}T0lu>P(%bv`WPgloNpn;qA76}%g(+#>9hVRE&~3j?>? zcB$dsNol>8zfSvb^;3~H7h`>tHpqer*Sw0G`ZF4-yF)GbkzNdHGM){lf+I^>LKP2xEWHXBUXQ`oQYkNw1 zs#3T02q)GNk$9IyEMWsDYEcMn69R>tnA&W&qs=i-H6AtiTeR1);brGkB9BFKH{q1< zzPiqWVhJfOvUO;I2Ajkkg+q9mI1ANI`a6*jTCNJrw=OCn1Ppdry5wHZTLYA$Kw26u zS|I^9^IpnTAWw>xM~ey}Qqo!;c6o7m*tBBnpq!y))4lwNn)SU$@pjp5e>QJ~M=mL4 zqLxIz>N6}%P$|uWtf;NjgWQ#syyd$C|z1!^W+ z=J`=KtMS}^(bgu0`wEiwUS@VSgGvY5He8#JC~)pYc&qEr*$o_OR&VmxY)9J4ha5hJcE8?f@T_dpk>G`13t?z zXJV(dkG_qK#p&t(oYu6PH&PRdK9X3gDQ_!^RccO@XXz-Bzb(2wS4V$sd_?M`hVSWT zQ6S7PE6DZJom*+O&3C*Mr$k(_Rqy-K+B{kI%Kh@zyT%RBq+i7&zVSkjHehRUPwYvQ zr(dPW2RK$2STJfYsI?@k+ZK;@zpB@d7u?p~r_Z}%u2C=kaNOXa)8~eAKVvaHSVX+S zUWz_+ONEFG<4qdvu@q@icIh_Z%AvIO$;K2dz{w8W)1>M{&NZ5NcTH8PhH_lU5Tv7j zb%cu>yXaAq9BjKZ*}V2Zg`}h(`64`{#=%t2 z-pEkd(caA3Z`+nJ=kN(%beJ(r=R~qqYtiqfg`V!6{InB4_l$`F` z*%+^%S-`txgUbhmjkYOqg7Bqa>irs05OkGfWBe;OauVyUnvXdW9#4jr^@g&|CU#GS zp5$}f7G|;p2^G_M>z-J^!pr&pHcA@8h{lg}WHjV?OQ1HiLeSqa&oHHwPd7v0rcPWLod3*zBI=R<;lQ zZ_C(t3gc;4EV&V4FW0DR)Mf&_@@T8r{TWykJu;)kolgY6NUq#$giS1{zxUQ_XdBh)eqqf( za1f{P5Rbv~;B~Da!y;wC2dxFq)d$_C$9Fe+t2Db493uiKhHw;RQ7#bz{{O)!l4#^Q z4V-V#zaDn#>1XToVLZr#{@oP*FWBjb@ob&KG?1_# z1IJ$*{O^cz4)8bw{2D<1f}PoQaRmAELCBBHKVjzs$@7h#DY)3?o-25C`49VkHNEHX zf46z>{Gknh z!u||A7jE_$aTg=-xt?+SGvY4B<8uSfG+YdK=Nbx;6ebeNNS`l9hsh!~{SCU;zLCDL{zBchnLL0Dy!705AZs;OZh)mi7jg z_F5lZtPSim=$$RhUu8prQ)dFeLC^nh`#(GaB_I1OI~h@1mG=ci+mwQ{0*i`Z37ZJj zDRyBJonure>bCHxA3dlH?(WvAm@dQZ4GNI>LuE6sKh z_}$zAUw@}6xYQd5TwPA*&+th%5ncsFh5Z=dX;sp&7`Rng$w};ky=`-L((IahuXM%IX}n z9(jDnqrwz3Wu~i~8cjDN5iXMKE)H4y@=Ak&18N3`(l>C=&bS8dS5|bn7dSqnyRXuj zqRw1e{b20^P)(Kq%S}gUmxOku;Ja{Vmr5Ksr#KIB$PrVVoRO&ufGDbhhyf?>9s~PsrkA2ng{7Q?-Zhhc^z@eafl?cuLqV|g zjfxQUBmQRyaj#a-$AQHq-thfC((85Bl2CL^9`ZV;(tt1bHV!Z}l(q?PZA#X@qd87r zOojen*L>;A9#)remBMiXiOY%|c`JlS2ds_xJEv#@< z8c7rUs1uB%cMd#-IkloMN8kj z{X#V58^)76P&&0wo{Sa!mU&dX_jTr#ms$<$*cMiC&;=a><-%ygmWzdTC-z3qd zU4eOHxB@2`(w6;^z1Bn>o1LIj&Eqp_un42GYE>#FFA4s_08&sUTeRS} za+O&b;}4#&pG3ZQjp1=waeFuDvBk=e7nZTP*t@RS3LTrmx`p#v09QlT0_kuM_b3>T zh^8w(ou+>|hs_*D+H1XyHXysCggSU4Bn;S0_T^_*>>{G|S?PyAsW?Tzt$M~o@~L@S zDjL<|lPnC`-boXEjA8a1{!4e*|Oam zA2@g%giEN(&UfcQKd!!rDlNR)SCPpsk6~Y$RTidrD0)G76Z5ItNZ6YIJ!MDhGZZ~p z@}}b(f9TPIe!SerWL{yg%%SdU&vfB{&sL*JR>M`8I(q!|9$np8dXRIHVN)6Y)K_Sj zq_<#lu!LKwh*b;NiaI?_2v|Q@(qgalg_Kw;*0&yw%pRDg9tRocJqzC{J~ZjrPk)Qk z0@hvu%@Eo`9H>5Kdn6~+ZFJbO;ajPa(w7S90O}_asM_adxavpQ#hD&}`qldj`DlDA%OM635lR!QQq}^4$0~ZUMwNM3UCB z$G}x}W=~^=&fMeZD zaseV<6KIvf2f%`X2>9nh^`A8S*P;amiYY-f{=dDI$bXRf4vMJ|o&y=3QXH|+=It5C z4iyhjVEb#{EK-uPcwVj$&@^eN%t$dnSOvJ84EH+TupzHQU|e<37KWfhx?-50@IYA& zUkpRQ>K>I6`-($hpdaoZ6dxfVC);5(^2hYirDJk#-%`KE3(UzU6TNJl2w*iGVRT?N zwHqTE08QG1HJje|ehP=BzBTpruae&wZoM>$EM7xr__~mcU*N$>@FKYY#&@SkX>U8k zu3^tx9t}UZjF@%g+#_xg7E2KqvGKVF*uM(w6DRR{6JxiPoz$#&vSu&*F!1Kv;yK;m z<%!{DfA91P=u7|ioPDgl_8}BBXWN4u3K#UmUvsvdv4MfT9m5|drk|5{>ZDZU0wXG5 z)h+KCX7GkL)h|laZ#p5cslI5m+J;aUt#^t&QgQlaIk;L!d0kcou0mpngC$Al+ByNq zZENATFZuovm3)~=G1yXlon&QJGa$OMrr5ZcTXWsW^0e!5BXC|~FU^Fr9@cLpuA$OV z3(`ji>C4ou=_pTw0xj^Xf4M6`V(WH|tw4+gs=p5YI~s0YSpT+eSqC7l7L8+6OEpBc zixhCv3cr#ghjJd6n(OK`=k~sz50zjX{^3wIjDj}3E^WeoQh3QIMmAssUlXlmgDI@p z6H5XD7;~zenxkDl3TBmiuGa7%mmm}SF zY;edguvRyEHMIo04ETyI`GGT@iUp&e%n1;5FsQ3DVzXT}ZM(DuNh?Ths1<;)r@^_q zFUE{om%REkG>dVZ7uB5bdgvb*7p(PWvvHVoVf5l3*$BxPro0713BeEcwmfaWfw$Vg zm*g;?VZ`|D5q%ZhLiTYG&F7am5y`X-Z09EXnz51uXFo|=5%F$Y=H)>aVm8z34?p_j zP!f9t@5_i8U}bO!o|*y0>_5~LSqVu#X5F=a%tJLe(vfj*%RNEH(>VV` z;OvSQ6ipZaVEENf0pSk<+Z!8L7%=>C{DZrPs_LNx9GIRAmjZ|v$G7YogXmUgY>WCF zaH>>!k!tGK)Wvvs63gLmJ~909!dX!XEtry8_p*`*K-Lc03q z{B*Q_DnayH%V^m^l6->d4bnDpk4Q4owa^5u2?0-=2VFD^1L2RBQEZ#g_f)ZikUfD| zd{i04C6q|MjD@wz_iWw+4xhd)mtUFDdfLV~w3aSaY% zh}jA4HB72=vp&Ai`mk&#B;psEJfW}fFVj|;0((K(z%WD;0~{5ynSeOHbX*(eSl|n@ z@fA|vhIpj(@r$$VOF=A(0B_@q27A4dLH|*vzUjcz)m4F)*O|}F4vNyMtAQr5_T6j(e(zz zvo3lUESeO!Smt6@69YMdvx83|!fZz!f|ZRn9?t2+W1Yc)=l7i$PQy0zZfxsFrUjiJ zd)~CB^j%$DF3b)xBRiDG(8A(njm^}R;6E}(y)}cS3^+Oyl4jpXQY5cKO38TiA~QLb z9D4}UeLIduGxGhAd&7Um9mJ`@OaY@rwk6C%5zs*Q30k-ixN^93&K=gIwA3Zej$Q|q zjl2<I_gMOKfN;%pkZ9C&} z#mTssIe8;RAbnbM5EI6{Uf>dJH%q}VMezH>=5t~B3K_Pr_W@RQ;w9QNF#!!WnlF}hCPA%A)m()A%w8f_H(HPd2 z+JqrvmU*Xk$OX9J`rrzzeXrRjXb|h7USgyuek{~p_NAUffUGxh`x??~Jjx-_fGQd?Rq-<75X4`YmJqN^z>?dV|@)FicpmBEiB zy>*Rc`I-bKtINRhD$-tIzRAYL)WM{PPdpb$9%f%1&N#v7i?c?@Go0|C+S8O2IK?c} zsM|NvSaQoyYR7))e*&Ejg>%9mXzhSFba4Bl(m%PPRgW2cAT_ za`fF)eHbZHWwHH9*3&*E+^;u4B^!p>Hb@W@sO?i~))Ton}tT9}An@6gY-`@>8= zDHT}JTX#T0>g8WuWOM42h}TMLJ`oP1|hR04fzBWezUbO(}YxA)sWTBZ$Q>V0@! zt-Ux4Lv^8n)WtpzSIG}Rjq!R;Ic_4hpS=7O&p)Frg$lH z#tPGP%d#GIN^;=wZM7^4O}wM57012MgQikX`+&lIpF7y`w4((`Bm9SWdYM02*ba0D z79IdV`g1w9vv)B!u=^QAe^(lgSY$_S!`bD5Z83Y(A7e%k7kzJ5Sw z3c%%KLXoa*Mi$47|2IKRv!D`KyLn#rVo?Yb6EQ%u2~?CYV-stULjwsG`$V|sH-?zlAAH1vHqHJs~f|) zZA2k)_0>J~wY1jWo1SETMp}}ANi~T{*F0{?*eu+j1WYz*^o`~Nb$r`p4EWbIbC_K)ynK;Z_khX?hR6j?@<0HDj zhwBz5K)r9#arZJ6G=}3_D&Cm!Fuc-?Rlt26i!=wey(hf~KN3}WPG@U1pdf7qVQN|@ zFjpXl^mr1-G-l+Y##E&oW35iQ5wR!D>9(5KAC-y418==JyC+MRvN&vbU<#|A2)4!I zR=22q5EDj3CB8S$DCnjCVxGu9*Q-b53%w^faF>R3-Fu%ZzF4Ah)SpA`=b~r;D{ZtW zzJV%A5a2O#FOiNuaxE_l=OIz6lV@>(o?m0tT^3+2bLM&pcX~*pU0(bG^SJ;VfwZR= z9O7Yjh5SR8%(ZB>k)nWlGkVLARs`4XXiKj=xNehL-JNsD@L_R<&AvBrY}IWc^~k!* zb==U{gVV|L-A28HucLSmQfw<>UE>H(yV10UnLH8C^W&F$w$W|1#v zH)W(b7eypkhgahnbIt4zSfQ)0gF{5ud6wyy}0LhxTbf9{7`t8m}|N zUiHYVFJ2ic(0!(Ecyh3{@ao7rYGW4MWYnxBEOX?8w zd{0ofmkp1y-u35CWs`lBl|3QlIf8biVq#Hmwz4X@hg{kiWn6hd3RBIvmnLGe3;NhK z+yd^usfV#E+x4*6b%>1_n$zPDT@iqWH**{{*kOnLl_j_h;(!d(nf(f?28qG)lrCnTY()(fTPQ z{iO~3sVx0c2>#HO{31I-IvFv=ZXljToZVt){1HVIILI0lcOZ1{7oZovMBQLMxm6Py zv~O)$k8ZK2Jn()cFhq`~n`-(wa5%;lr)~?tZ$g!v`#&(@fV@#Ulk{!L5Yp0;!M#x` zSO}H=auiRvU7p|PD>VtK?I7p&-LmDSfygQno(k+xiv9$ z2af6?to;~(UNS;?!)2(uu_G22@@pei>R!(9@M1dCkb zilB0P-YKgrqkUfP;ehvAOMa8+F>LH_du7SZ=Ghg&aNXjly;z(>)}VfdM>Rntzqjwj zSp0oD-tZN^vduato3wsH3D?Y$1XXWF_YhamJd*@IpZB@Wg7{=j26Lk7gDg)Z9&7l7 zPG7!kGm0HOd7kRK`QxY>naPV&mDA)S#trAVS=!eMXjEPGvjtwR2%naEsFd8~Yag0vRF|bYG zwVF;FhN~%YL@2|vRL^)Ov%xRh_Ey#|W40R_yCLxyo6p2jDy211FD&aWYGUf^htg8U zKL%J2UWW)OIF9-1^xS;WKpUwM`7S!C08=2Fgp)n)kh;YjZ(eTEtk%32QcM48xKF+< zvCh`D(K(fPWYRvQs_g*g2kagK0_)Bwfw4LYzBQ59yXgd2n(VFSqp>qSnum^%qnPd4 zMcPh;!D&kFH^SO2U?Z8Eq)=msNGZdt)|y|gv5aiq4BhJNmdkRqS82Z0;towFTFCv< zUy(w_m}Uf-M>l^5Prk(X>%YS|9t!WxM-J=?s--axUBwscWJbv_N_eY8D;3~C@M`BUDvJbv&xLP zGsme^1GziHKsmLF$Q>ILhpZ}&7%ky^&P38!cVEBVCFW5de#YHyGZ zXq|D~Gr%H1X`vC;BlGFFncF!F-HEQ{iE@J0Y&1){Af0&mNv%`Z84}+k<|z}@H>8p` zw<4>m+7W>$Tv}i_Sa;=NOt9q7k*}0RI;gyXt9_mjw@hNs5F2TH59-ke80{tB5bB{H z^>OUT!K@2+GxXii4*qO0S;ip)tgV(7QimkjY4;2VDZ%<7W z>l8Yxed^4a&)lJdtkr^Y3f|(5>S8YwiGqsk^fMt)ZOI}zZ(fB`)~-=OL=v|cL`FfJoz8l3VL-|##*5Gl-!t%1 z^;NGcr?q8dLUBvP46j%Gy63h-hEp_NW6WDOv=fNX!g?m;g&-s=nw0A3o6-?nw~qrZ zH)_#%bb2V&ay^SFKb8Va@HPkH)!u0kn{fkIyK>kl*fQ3D;n(9Cd)5pIKjK}|Sq-3D z6lS*Q`3Bxx_3h`q`>GF#vI!{2%iSF;W~wB-P=^74(6=iBG~$!TgC(eS-81R(%XU) zIoZ+dJ|R~fl7!m@xp>;;qe^?EDRL+C3<-jrBVz#HWgMr&otdGId~oMSM8nE(cV)-7 zR_F=VV#(~H>MONAgI&FWxmv$V*!PUEzlglI9Gl;^-5M7q9JGgYzi^%65_wKZ6+0?0 z(7NB%Aa$#t7r)qwq}U5Y?v)U@yoeeK#8mPhX>xx(n@3wqZ(e$*J+K1USINM!zF@%G zSGAWiw|%L6RkzRVx@4a&eQmufp~gC<-XONvFnk3 z5K}oWK$#fXuU|$XCDtkU@4X!s`c?p=EtXbi)s{)NQ$UmW*=hZS9Y(|JA5*W(NzVaA zP^e-63hvNA)h~T3Jy}~TYdZ!#D_euV8mazQ^a4dOVKG9|U5uEbH}8Isp7Yzpjp{1i!91M6YHjw929du7L_gXk+9FC@Q*aL=1Rs#=;7wd{r z1`U-FW%2FaCW4zv##{0WuxnY3+#I-t!E6}%LrrflSh}S{2xy&fp(qZT)7opVMj(kb z!Sif++S?=z%i`bbY~CA{kr;FY-4;*~3b03>ELjOVLR5yIlB{zgkfB5A^Wvd8d9(Jw zkdQxtJnp~CYTO!^Xvv^Es31>8{ZC)iv9|u7D}sFRuPrsE!*cOw(+t!-a=>wTswH)R zppslJt?OI`K*CkofE$}e%J(~df4adJBIrXT$Av}vcH*3x@O8@8_H>N)+P=fpb5_>c?C8q#H?S(QR!IqLHDYLp0EL1k8dZ2 zO>tmVLPAkX#^FsQ@1}QmW6K#ITBfz3NJ4#g6EaV7I?{%qND6en#`|;O4NA^K`U>ED zXDhTcK`_vb$?jG~!1ox6xsx{IF_fZ^qa0lk|0@(VjS@D^5lDbI(yl% z>X0BY_29H5>fO#sy+*<$WOsyyfPoA2qy3CMQE!?>)&|B)1aX51aqL}P0%q-OH#95r zo}u+L{ehCYo?M5-gXx&Qv}-_%#;?GF7`wYl2~Co_-E;B5w;^Bb?PAi9*%rGj|5Xci z)8Yyu$tUSi%DceS5$+Zi0FmTB8tQ&G{JpK{m+27nzYEs?YBKuW`0p(dzf1vu4F?UdFH=i`KTUsE zLVid1T@Uz$kV^Rb82qC;@H^n|>GdzbbfP~1e@m|aG1>kd^zUK7FGv7D>NNoHUqQj| m=6|2L|I>UHqyYTe{I7XjRtgH_g8%>`Xr~0ZaRJTGtN#b7H;xDZ diff --git a/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py b/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py new file mode 100644 index 000000000..2abb38e8f --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py @@ -0,0 +1,19 @@ +from ragas.metrics import ( + faithfulness, + answer_correctness, + context_precision, + answer_relevancy, + context_recall, + context_utilization, + context_entity_recall, +) + +RAGAS_METRICS_DICT = { + "context_precision": context_precision, + "faithfulness": faithfulness, + "answer_relevancy": answer_relevancy, + "answer_correctness": answer_correctness, + "context_recall": context_recall, + "context_utilization": context_utilization, + "context_entity_recall": context_entity_recall, +} diff --git a/hugegraph-python-client/requirements.txt b/hugegraph-python-client/requirements.txt index d7a8148a0..c4df41c62 100644 --- a/hugegraph-python-client/requirements.txt +++ b/hugegraph-python-client/requirements.txt @@ -1,4 +1,4 @@ decorator==5.1.1 -requests==2.32.0 +requests~=2.32.0 setuptools==70.0.0 urllib3==2.2.2 From 0e594475f329999ae7cfe3846b81274ecf75fa41 Mon Sep 17 00:00:00 2001 From: jasinliu <939282975@qq.com> Date: Thu, 10 Oct 2024 14:11:03 +0800 Subject: [PATCH 2/3] add license --- .../src/hugegraph_llm/utils/ragas_utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py b/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py index 2abb38e8f..d45411c2c 100644 --- a/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py +++ b/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from ragas.metrics import ( faithfulness, answer_correctness, From 6e82497bb8a00a6378413b9354d1c02550ad4a94 Mon Sep 17 00:00:00 2001 From: jasinliu <939282975@qq.com> Date: Wed, 6 Nov 2024 01:28:20 +0800 Subject: [PATCH 3/3] update demo --- hugegraph-llm/requirements.txt | 2 +- .../hugegraph_llm/demo/rag_demo/rag_block.py | 90 ++++++++++++------- .../src/hugegraph_llm/utils/ragas_utils.py | 43 ++++++--- 3 files changed, 86 insertions(+), 49 deletions(-) diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt index 8f4f1bb52..8631b85db 100644 --- a/hugegraph-llm/requirements.txt +++ b/hugegraph-llm/requirements.txt @@ -14,4 +14,4 @@ python-dotenv>=1.0.1 pyarrow~=17.0.0 # TODO: a temporary dependency for pandas, figure out why ImportError pandas~=2.2.2 openpyxl~=3.1.5 -ragas~=0.1.20 +git+https://github.com/jasinliu/ragas.git@patch-2 # TODO: wait for release diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py index 21092b1e8..63c425714 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py @@ -19,31 +19,33 @@ import json import os -from typing import Tuple, List, Literal, Optional +from typing import List, Literal, Optional, Tuple -from datasets import Dataset import gradio as gr -from gradio.utils import NamedString import pandas as pd +from datasets import Dataset +from gradio.utils import NamedString +from langchain_openai.chat_models import ChatOpenAI from ragas import evaluate +from ragas.llms import LangchainLLMWrapper -from hugegraph_llm.config import resource_path, prompt +from hugegraph_llm.config import prompt, resource_path, settings from hugegraph_llm.operators.graph_rag_task import RAGPipeline from hugegraph_llm.utils.log import log -from hugegraph_llm.utils.ragas_utils import RAGAS_METRICS_DICT +from hugegraph_llm.utils.ragas_utils import RAGAS_METRICS_DICT, RAGAS_METRICS_ZH_DICT def rag_answer( - text: str, - raw_answer: bool, - vector_only_answer: bool, - graph_only_answer: bool, - graph_vector_answer: bool, - graph_ratio: float, - rerank_method: Literal["bleu", "reranker"], - near_neighbor_first: bool, - custom_related_information: str, - answer_prompt: str, + text: str, + raw_answer: bool, + vector_only_answer: bool, + graph_only_answer: bool, + graph_vector_answer: bool, + graph_ratio: float, + rerank_method: Literal["bleu", "reranker"], + near_neighbor_first: bool, + custom_related_information: str, + answer_prompt: str, ) -> Tuple: """ Generate an answer using the RAG (Retrieval-Augmented Generation) pipeline. @@ -177,8 +179,7 @@ def toggle_slider(enable): > 1. Download the template file & fill in the questions you want to test. > 2. Upload the file & click the button to generate answers. (Preview shows the first 40 lines) > 3. The answer options are the same as the above RAG/Q&A frame - """ - ) + """) # TODO: Replace string with python constant tests_df_headers = [ @@ -309,29 +310,45 @@ def several_rag_answer( questions_file.change(read_file_to_excel, questions_file, [qa_dataframe, answer_max_line_count]) answer_max_line_count.change(change_showing_excel, answer_max_line_count, qa_dataframe) - def evaluate_rag(metrics: List[str], num: int): + def evaluate_rag(metrics: List[str], num: int, language: Literal["english", "chinese"]): answers_df = pd.read_excel(answers_path) answers_df = answers_df.head(num) if not any(answers_df.columns.isin(rag_answer_header_dict)): raise gr.Error("No RAG answers found in the answer file.") - rag_answers = [answer for answer in rag_answer_header_dict if answer in answers_df.columns] - df = pd.DataFrame() + if language == "chinese": + eval_metrics = [RAGAS_METRICS_ZH_DICT[metric] for metric in metrics] + else: + eval_metrics = [RAGAS_METRICS_DICT[metric] for metric in metrics] + rag_method_names = [answer for answer in rag_answer_header_dict if answer in answers_df.columns] + score_df = pd.DataFrame() - for answer in rag_answers: + for answer in rag_method_names: context_header = rag_answer_header_dict[answer] answers_df[context_header] = answers_df[context_header].apply(json.loads) rag_data = { - "question": answers_df["Question"].to_list(), - "answer": answers_df[answer].to_list(), - "contexts": answers_df[rag_answer_header_dict[answer]].to_list(), - "ground_truth": answers_df["Expected Answer"].to_list(), + "user_input": answers_df["Question"].to_list(), + "response": answers_df[answer].to_list(), + "retrieved_contexts": answers_df[rag_answer_header_dict[answer]].to_list(), + "reference": answers_df["Expected Answer"].to_list(), } + eval_llm = LangchainLLMWrapper( + ChatOpenAI( + model="gpt-4o-mini", + temperature=0, + base_url=settings.openai_api_base, + api_key=settings.openai_api_key, + ) + ) + dataset = Dataset.from_dict(rag_data) - score = evaluate(dataset, metrics=[RAGAS_METRICS_DICT[metric] for metric in metrics]) - print(score.scores.to_pandas()) - df = pd.concat([df, score.scores.to_pandas()]) - df.insert(0, 'method', rag_answers) - return df + score = evaluate( + dataset, + metrics=eval_metrics, + llm=eval_llm, + ) + score_df = pd.concat([score_df, score.to_pandas()]) + score_df.insert(0, "method", rag_method_names) + return score_df with gr.Row(): with gr.Column(): @@ -340,14 +357,19 @@ def evaluate_rag(metrics: List[str], num: int): value=ragas_metrics_list[:4], multiselect=True, label="Metrics", - info="Several evaluation metrics from `ragas`, please refer to https://docs.ragas.io/en/stable/concepts/metrics/index.html", + info=( + "Several evaluation metrics from `ragas`, ", + "please refer to https://docs.ragas.io/en/stable/concepts/metrics/index.html", + ), ) with gr.Column(): - dataset_nums = gr.Number(1, label="Dataset Numbers", minimum=1, maximum=1) + with gr.Row(): + dataset_nums = gr.Number(1, label="Dataset Numbers", minimum=1, maximum=1) + language = gr.Radio(["english", "chinese"], label="Language", value="chinese") ragas_btn = gr.Button("Evaluate RAG", variant="primary") ragas_btn.click( evaluate_rag, - inputs=[ragas_metrics, dataset_nums], + inputs=[ragas_metrics, dataset_nums, language], outputs=[gr.DataFrame(label="RAG Evaluation Results", headers=ragas_metrics_list)], ) - return inp, answer_prompt_input \ No newline at end of file + return inp, answer_prompt_input diff --git a/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py b/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py index d45411c2c..f0535b386 100644 --- a/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py +++ b/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py @@ -15,22 +15,37 @@ # specific language governing permissions and limitations # under the License. +from pysbd import Segmenter from ragas.metrics import ( - faithfulness, - answer_correctness, - context_precision, - answer_relevancy, - context_recall, - context_utilization, - context_entity_recall, + ContextEntityRecall, + FactualCorrectness, + Faithfulness, + LLMContextPrecisionWithoutReference, + LLMContextPrecisionWithReference, + LLMContextRecall, + NoiseSensitivity, + ResponseRelevancy, ) RAGAS_METRICS_DICT = { - "context_precision": context_precision, - "faithfulness": faithfulness, - "answer_relevancy": answer_relevancy, - "answer_correctness": answer_correctness, - "context_recall": context_recall, - "context_utilization": context_utilization, - "context_entity_recall": context_entity_recall, + "context_entity_recall": ContextEntityRecall(), + "factual_correctness": FactualCorrectness(), + "faithfulness": Faithfulness(), + "llm_context_precision_without_reference": LLMContextPrecisionWithoutReference(), + "llm_context_precision_with_reference": LLMContextPrecisionWithReference(), + "llm_context_recall": LLMContextRecall(), + "noise_sensitivity": NoiseSensitivity(), + "response_relevancy": ResponseRelevancy(), } + +RAGAS_METRICS_ZH_DICT = { + "context_entity_recall": ContextEntityRecall(), + "factual_correctness": FactualCorrectness(sentence_segmenter=Segmenter(language="zh", clean=True)), + "faithfulness": Faithfulness(sentence_segmenter=Segmenter(language="zh", clean=True)), + "llm_context_precision_without_reference": LLMContextPrecisionWithoutReference(), + "llm_context_precision_with_reference": LLMContextPrecisionWithReference(), + "llm_context_recall": LLMContextRecall(), + "noise_sensitivity": NoiseSensitivity(sentence_segmenter=Segmenter(language="zh", clean=True)), + "response_relevancy": ResponseRelevancy(), +} +