diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt
index e10cb22f6..8631b85db 100644
--- a/hugegraph-llm/requirements.txt
+++ b/hugegraph-llm/requirements.txt
@@ -14,3 +14,4 @@ python-dotenv>=1.0.1
 pyarrow~=17.0.0 # TODO: a temporary dependency for pandas, figure out why ImportError
 pandas~=2.2.2
 openpyxl~=3.1.5
+git+https://github.com/jasinliu/ragas.git@patch-2 # TODO: wait for release
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
index 912b064f1..5057c9465 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
@@ -90,9 +90,9 @@ def init_rag_ui() -> gr.Interface:
 
         with gr.Tab(label="1. Build RAG Index 💡"):
             textbox_input_schema, textbox_info_extract_template = create_vector_graph_block()
-        with gr.Tab(label="2. (Graph)RAG & User Functions 📖"):
+        with gr.Tab(label="2,3. (Graph)RAG & User Functions 📖"):
             textbox_inp, textbox_answer_prompt_input = create_rag_block()
-        with gr.Tab(label="3. Others Tools 🚧"):
+        with gr.Tab(label="4. Others Tools 🚧"):
             create_other_block()
         
 
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
index 66c9b19f3..63c425714 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
@@ -17,29 +17,35 @@
 
 # pylint: disable=E1101
 
+import json
 import os
-from typing import Tuple, Literal, Optional
+from typing import List, Literal, Optional, Tuple
 
 import gradio as gr
 import pandas as pd
+from datasets import Dataset
 from gradio.utils import NamedString
+from langchain_openai.chat_models import ChatOpenAI
+from ragas import evaluate
+from ragas.llms import LangchainLLMWrapper
 
-from hugegraph_llm.config import resource_path, prompt
+from hugegraph_llm.config import prompt, resource_path, settings
 from hugegraph_llm.operators.graph_rag_task import RAGPipeline
 from hugegraph_llm.utils.log import log
+from hugegraph_llm.utils.ragas_utils import RAGAS_METRICS_DICT, RAGAS_METRICS_ZH_DICT
 
 
 def rag_answer(
-        text: str,
-        raw_answer: bool,
-        vector_only_answer: bool,
-        graph_only_answer: bool,
-        graph_vector_answer: bool,
-        graph_ratio: float,
-        rerank_method: Literal["bleu", "reranker"],
-        near_neighbor_first: bool,
-        custom_related_information: str,
-        answer_prompt: str,
+    text: str,
+    raw_answer: bool,
+    vector_only_answer: bool,
+    graph_only_answer: bool,
+    graph_vector_answer: bool,
+    graph_ratio: float,
+    rerank_method: Literal["bleu", "reranker"],
+    near_neighbor_first: bool,
+    custom_related_information: str,
+    answer_prompt: str,
 ) -> Tuple:
     """
     Generate an answer using the RAG (Retrieval-Augmented Generation) pipeline.
@@ -69,17 +75,29 @@ def rag_answer(
         rag.extract_keywords().keywords_to_vid().query_graphdb()
     # TODO: add more user-defined search strategies
     rag.merge_dedup_rerank(graph_ratio, rerank_method, near_neighbor_first, custom_related_information)
-    rag.synthesize_answer(raw_answer, vector_only_answer, graph_only_answer, graph_vector_answer, answer_prompt)
+    rag.synthesize_answer(answer_prompt)
 
     try:
-        context = rag.run(verbose=True, query=text, vector_search=vector_search, graph_search=graph_search)
+        context = rag.run(
+            verbose=True,
+            query=text,
+            raw_answer=raw_answer,
+            vector_only_answer=vector_only_answer,
+            graph_only_answer=graph_only_answer,
+            graph_vector_answer=graph_vector_answer,
+        )
         if context.get("switch_to_bleu"):
             gr.Warning("Online reranker fails, automatically switches to local bleu rerank.")
         return (
-            context.get("raw_answer", ""),
-            context.get("vector_only_answer", ""),
-            context.get("graph_only_answer", ""),
-            context.get("graph_vector_answer", ""),
+            context.get("raw_answer_result", ""),
+            context.get("vector_only_answer_result", ""),
+            context.get("graph_only_answer_result", ""),
+            context.get("graph_vector_answer_result", ""),
+            {
+                "vector_contexts": context.get("vector_contexts"),
+                "graph_contexts": context.get("graph_contexts"),
+                "graph_vector_contexts": context.get("graph_vector_contexts"),
+            },
         )
     except ValueError as e:
         log.critical(e)
@@ -124,9 +142,7 @@ def toggle_slider(enable):
                     )
                     graph_ratio = gr.Slider(0, 1, 0.5, label="Graph Ratio", step=0.1, interactive=False)
 
-                graph_vector_radio.change(
-                    toggle_slider, inputs=graph_vector_radio, outputs=graph_ratio
-                )  # pylint: disable=no-member
+                graph_vector_radio.change(toggle_slider, inputs=graph_vector_radio, outputs=graph_ratio)  # pylint: disable=no-member
                 near_neighbor_first = gr.Checkbox(
                     value=False,
                     label="Near neighbor first(Optional)",
@@ -135,6 +151,10 @@ def toggle_slider(enable):
                 custom_related_information = gr.Text(
                     prompt.custom_rerank_info,
                     label="Custom related information(Optional)",
+                    info=(
+                        "Used for rerank, can increase the weight of knowledge related to it, such as `law`. "
+                        "Multiple values can be separated by commas."
+                    ),
                 )
                 btn = gr.Button("Answer Question", variant="primary")
 
@@ -160,34 +180,46 @@ def toggle_slider(enable):
     > 2. Upload the file & click the button to generate answers. (Preview shows the first 40 lines)
     > 3. The answer options are the same as the above RAG/Q&A frame 
     """)
+
+    # TODO: Replace string with python constant
     tests_df_headers = [
         "Question",
-        "Expected Answer",
-        "Basic LLM Answer",
-        "Vector-only Answer",
         "Graph-only Answer",
         "Graph-Vector Answer",
+        "Vector-only Answer",
+        "Basic LLM Answer",
+        "Expected Answer",
     ]
+    rag_answer_header_dict = {
+        "Vector-only Answer": "Vector Contexts",
+        "Graph-only Answer": "Graph Contexts",
+        "Graph-Vector Answer": "Graph-Vector Contexts",
+    }
+
     answers_path = os.path.join(resource_path, "demo", "questions_answers.xlsx")
     questions_path = os.path.join(resource_path, "demo", "questions.xlsx")
     questions_template_path = os.path.join(resource_path, "demo", "questions_template.xlsx")
 
+    ragas_metrics_list = list(RAGAS_METRICS_DICT.keys())
+
     def read_file_to_excel(file: NamedString, line_count: Optional[int] = None):
-        df = None
+        if os.path.exists(answers_path):
+            os.remove(answers_path)
+        df = pd.DataFrame()
         if not file:
             return pd.DataFrame(), 1
         if file.name.endswith(".xlsx"):
             df = pd.read_excel(file.name, nrows=line_count) if file else pd.DataFrame()
         elif file.name.endswith(".csv"):
             df = pd.read_csv(file.name, nrows=line_count) if file else pd.DataFrame()
-        df.to_excel(questions_path, index=False)
-        if df.empty:
-            df = pd.DataFrame([[""] * len(tests_df_headers)], columns=tests_df_headers)
         else:
-            df.columns = tests_df_headers
+            raise gr.Error("Only support .xlsx and .csv files.")
+        df.to_excel(questions_path, index=False)
         # truncate the dataframe if it's too long
         if len(df) > 40:
             return df.head(40), 40
+        if len(df) == 0:
+            gr.Warning("No data in the file.")
         return df, len(df)
 
     def change_showing_excel(line_count):
@@ -216,7 +248,7 @@ def several_rag_answer(
         total_rows = len(df)
         for index, row in df.iterrows():
             question = row.iloc[0]
-            basic_llm_answer, vector_only_answer, graph_only_answer, graph_vector_answer = rag_answer(
+            llm_answer, vector_only_answer, graph_only_answer, graph_vector_answer, contexts = rag_answer(
                 question,
                 is_raw_answer,
                 is_vector_only_answer,
@@ -228,18 +260,30 @@ def several_rag_answer(
                 custom_related_information,
                 answer_prompt,
             )
-            df.at[index, "Basic LLM Answer"] = basic_llm_answer
-            df.at[index, "Vector-only Answer"] = vector_only_answer
-            df.at[index, "Graph-only Answer"] = graph_only_answer
-            df.at[index, "Graph-Vector Answer"] = graph_vector_answer
+            df.at[index, "Basic LLM Answer"] = llm_answer if llm_answer else None
+            df.at[index, "Vector-only Answer"] = vector_only_answer if vector_only_answer else None
+            df.at[index, "Graph-only Answer"] = graph_only_answer if graph_only_answer else None
+            df.at[index, "Graph-Vector Answer"] = graph_vector_answer if graph_vector_answer else None
+            if "Vector Contexts" not in df.columns:
+                df["Vector Contexts"] = None
+                df["Graph Contexts"] = None
+                df["Graph-Vector Contexts"] = None
+            df.at[index, "Vector Contexts"] = contexts.get("vector_contexts")
+            df.at[index, "Graph Contexts"] = contexts.get("graph_contexts")
+            df.at[index, "Graph-Vector Contexts"] = contexts.get("graph_vector_contexts")
             progress((index + 1, total_rows))
-        answers_path = os.path.join(resource_path, "demo", "questions_answers.xlsx")
+
+        df = df.dropna(axis=1, how="all")
+        df_to_show = df[[col for col in tests_df_headers if col in df.columns]]
+        for rag_context_header in rag_answer_header_dict.values():
+            if rag_context_header in df.columns:
+                df[rag_context_header] = df[rag_context_header].apply(lambda x: json.dumps(x, ensure_ascii=False))
         df.to_excel(answers_path, index=False)
-        return df.head(answer_max_line_count), answers_path
+        return df_to_show.head(answer_max_line_count), answers_path
 
     with gr.Row():
         with gr.Column():
-            questions_file = gr.File(file_types=[".xlsx", ".csv"], label="Questions File (.xlsx & csv)")
+            questions_file = gr.File(file_types=[".xlsx", ".csv"], label="Questions File (.xlsx & .csv)")
         with gr.Column():
             test_template_file = os.path.join(resource_path, "demo", "questions_template.xlsx")
             gr.File(value=test_template_file, label="Download Template File")
@@ -265,4 +309,67 @@ def several_rag_answer(
     )
     questions_file.change(read_file_to_excel, questions_file, [qa_dataframe, answer_max_line_count])
     answer_max_line_count.change(change_showing_excel, answer_max_line_count, qa_dataframe)
-    return inp, answer_prompt_input
\ No newline at end of file
+
+    def evaluate_rag(metrics: List[str], num: int, language: Literal["english", "chinese"]):
+        answers_df = pd.read_excel(answers_path)
+        answers_df = answers_df.head(num)
+        if not any(answers_df.columns.isin(rag_answer_header_dict)):
+            raise gr.Error("No RAG answers found in the answer file.")
+        if language == "chinese":
+            eval_metrics = [RAGAS_METRICS_ZH_DICT[metric] for metric in metrics]
+        else:
+            eval_metrics = [RAGAS_METRICS_DICT[metric] for metric in metrics]
+        rag_method_names = [answer for answer in rag_answer_header_dict if answer in answers_df.columns]
+        score_df = pd.DataFrame()
+
+        for answer in rag_method_names:
+            context_header = rag_answer_header_dict[answer]
+            answers_df[context_header] = answers_df[context_header].apply(json.loads)
+            rag_data = {
+                "user_input": answers_df["Question"].to_list(),
+                "response": answers_df[answer].to_list(),
+                "retrieved_contexts": answers_df[rag_answer_header_dict[answer]].to_list(),
+                "reference": answers_df["Expected Answer"].to_list(),
+            }
+            eval_llm = LangchainLLMWrapper(
+                ChatOpenAI(
+                    model="gpt-4o-mini",
+                    temperature=0,
+                    base_url=settings.openai_api_base,
+                    api_key=settings.openai_api_key,
+                )
+            )
+
+            dataset = Dataset.from_dict(rag_data)
+            score = evaluate(
+                dataset,
+                metrics=eval_metrics,
+                llm=eval_llm,
+            )
+            score_df = pd.concat([score_df, score.to_pandas()])
+        score_df.insert(0, "method", rag_method_names)
+        return score_df
+
+    with gr.Row():
+        with gr.Column():
+            ragas_metrics = gr.Dropdown(
+                choices=ragas_metrics_list,
+                value=ragas_metrics_list[:4],
+                multiselect=True,
+                label="Metrics",
+                info=(
+                    "Several evaluation metrics from `ragas`, ",
+                    "please refer to https://docs.ragas.io/en/stable/concepts/metrics/index.html",
+                ),
+            )
+        with gr.Column():
+            with gr.Row():
+                dataset_nums = gr.Number(1, label="Dataset Numbers", minimum=1, maximum=1)
+                language = gr.Radio(["english", "chinese"], label="Language", value="chinese")
+            ragas_btn = gr.Button("Evaluate RAG", variant="primary")
+    ragas_btn.click(
+        evaluate_rag,
+        inputs=[ragas_metrics, dataset_nums, language],
+        outputs=[gr.DataFrame(label="RAG Evaluation Results", headers=ragas_metrics_list)],
+    )
+    return inp, answer_prompt_input
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
index c4ff757bd..ce447e16b 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
@@ -66,21 +66,21 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
         if self.custom_related_information:
             query = query + self.custom_related_information
         context["graph_ratio"] = self.graph_ratio
-        vector_search = context.get("vector_search", False)
-        graph_search = context.get("graph_search", False)
-        if graph_search and vector_search:
-            graph_length = int(self.topk * self.graph_ratio)
-            vector_length = self.topk - graph_length
-        else:
-            graph_length = self.topk
-            vector_length = self.topk
-
+        
+        raw_answer = context.get("raw_answer", False)
+        vector_only_answer = context.get("vector_only_answer", False)
+        graph_only_answer = context.get("graph_only_answer", False)
+        graph_vector_answer = context.get("graph_vector_answer", False)
+
+        if raw_answer and not (vector_only_answer or graph_only_answer or graph_vector_answer):
+            return context
+        
         vector_result = context.get("vector_result", [])
-        vector_length = min(len(vector_result), vector_length)
+        vector_length = min(len(vector_result), self.topk)
         vector_result = self._dedup_and_rerank(query, vector_result, vector_length)
 
         graph_result = context.get("graph_result", [])
-        graph_length = min(len(graph_result), graph_length)
+        graph_length = min(len(graph_result), self.topk)
         if self.near_neighbor_first:
             graph_result = self._rerank_with_vertex_degree(
                 query,
@@ -94,12 +94,17 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
         else:
             graph_result = self._dedup_and_rerank(query, graph_result, graph_length)
 
+        context["graph_rerank_length"] = min(graph_length, int(self.topk * self.graph_ratio))
+        context["vector_rerank_length"] = min(vector_length, self.topk - int(self.topk * self.graph_ratio))
+
         context["vector_result"] = vector_result
         context["graph_result"] = graph_result
 
         return context
 
     def _dedup_and_rerank(self, query: str, results: List[str], topn: int) -> List[str]:
+        if topn == 0:
+            return []
         results = list(set(results))
         if self.method == "bleu":
             return _bleu_rerank(query, results)[:topn]
@@ -116,6 +121,9 @@ def _rerank_with_vertex_degree(
         vertex_degree_list: Optional[List[List[str]]],
         knowledge_with_degree: Dict[str, List[str]],
     ) -> List[str]:
+        if topn == 0:
+            return []
+
         if vertex_degree_list is None or len(vertex_degree_list) == 0:
             return self._dedup_and_rerank(query, results, topn)
 
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
index e6da8e095..86ed0b904 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
@@ -169,28 +169,16 @@ def merge_dedup_rerank(
 
     def synthesize_answer(
             self,
-            raw_answer: bool = False,
-            vector_only_answer: bool = True,
-            graph_only_answer: bool = False,
-            graph_vector_answer: bool = False,
             answer_prompt: Optional[str] = None,
     ):
         """
         Add an answer synthesis operator to the pipeline.
 
-        :param raw_answer: Whether to return raw answers.
-        :param vector_only_answer: Whether to return vector-only answers.
-        :param graph_only_answer: Whether to return graph-only answers.
-        :param graph_vector_answer: Whether to return graph-vector combined answers.
         :param answer_prompt: Template for the answer synthesis prompt.
         :return: Self-instance for chaining.
         """
         self._operators.append(
             AnswerSynthesize(
-                raw_answer=raw_answer,
-                vector_only_answer=vector_only_answer,
-                graph_only_answer=graph_only_answer,
-                graph_vector_answer=graph_vector_answer,
                 prompt_template=answer_prompt,
             )
         )
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
index baf61e647..a8ef6b6f3 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
@@ -18,7 +18,7 @@
 # pylint: disable=W0621
 
 import asyncio
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 
 from hugegraph_llm.config import prompt
 from hugegraph_llm.models.llms.base import BaseLLM
@@ -33,19 +33,33 @@
 DEFAULT_ANSWER_TEMPLATE = prompt.answer_prompt
 
 
+def _get_vector_result_str(vector_result: List[str]) -> str:
+    if vector_result:
+        return "Phrases related to the query:\n" + "\n".join(
+            f"{i + 1}. {res}" for i, res in enumerate(vector_result)
+        )
+    no_vector_data_msg = "No (vector)phrase related to the query."
+    log.warning(no_vector_data_msg)
+    return no_vector_data_msg
+
+def _get_graph_result_str(graph_context_head: str, graph_result: List[str]) -> str:
+    if graph_result:
+        return graph_context_head + "\n".join(
+            f"{i + 1}. {res}" for i, res in enumerate(graph_result)
+        )
+    no_graph_data_msg = "No related graph data found for current query."
+    log.warning(no_graph_data_msg)
+    return no_graph_data_msg
+
 class AnswerSynthesize:
     def __init__(
-            self,
-            llm: Optional[BaseLLM] = None,
-            prompt_template: Optional[str] = None,
-            question: Optional[str] = None,
-            context_body: Optional[str] = None,
-            context_head: Optional[str] = None,
-            context_tail: Optional[str] = None,
-            raw_answer: bool = False,
-            vector_only_answer: bool = True,
-            graph_only_answer: bool = False,
-            graph_vector_answer: bool = False,
+        self,
+        llm: Optional[BaseLLM] = None,
+        prompt_template: Optional[str] = None,
+        question: Optional[str] = None,
+        context_body: Optional[str] = None,
+        context_head: Optional[str] = None,
+        context_tail: Optional[str] = None,
     ):
         self._llm = llm
         self._prompt_template = prompt_template or DEFAULT_ANSWER_TEMPLATE
@@ -53,10 +67,6 @@ def __init__(
         self._context_body = context_body
         self._context_head = context_head
         self._context_tail = context_tail
-        self._raw_answer = raw_answer
-        self._vector_only_answer = vector_only_answer
-        self._graph_only_answer = graph_only_answer
-        self._graph_vector_answer = graph_vector_answer
 
     def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
         if self._llm is None:
@@ -66,98 +76,148 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
             self._question = context.get("query") or None
         assert self._question is not None, "No question for synthesizing."
 
-        context_head_str = context.get("synthesize_context_head") or self._context_head or ""
-        context_tail_str = context.get("synthesize_context_tail") or self._context_tail or ""
+        context_head_str = (
+            context.get("synthesize_context_head") or self._context_head or ""
+        )
+        context_tail_str = (
+            context.get("synthesize_context_tail") or self._context_tail or ""
+        )
 
         if self._context_body is not None:
-            context_str = (f"{context_head_str}\n"
-                           f"{self._context_body}\n"
-                           f"{context_tail_str}".strip("\n"))
+            context_str = (
+                f"{context_head_str}\n"
+                f"{self._context_body}\n"
+                f"{context_tail_str}".strip("\n")
+            )
 
-            final_prompt = self._prompt_template.format(context_str=context_str, query_str=self._question)
+            final_prompt = self._prompt_template.format(
+                context_str=context_str, query_str=self._question
+            )
             response = self._llm.generate(prompt=final_prompt)
             return {"answer": response}
 
-        vector_result = context.get("vector_result")
-        if vector_result:
-            vector_result_context = "Phrases related to the query:\n" + "\n".join(
-                f"{i + 1}. {res}" for i, res in enumerate(vector_result)
-            )
-        else:
-            vector_result_context = "No (vector)phrase related to the query."
-
-        graph_result = context.get("graph_result")
-        if graph_result:
-            graph_context_head = context.get("graph_context_head", "Knowledge from graphdb for the query:\n")
-            graph_result_context = graph_context_head + "\n".join(
-                f"{i + 1}. {res}" for i, res in enumerate(graph_result)
+        context = asyncio.run(
+            self.async_generate(
+                context,
+                context_head_str,
+                context_tail_str
             )
-        else:
-            graph_result_context = "No related graph data found for current query."
-            log.warning(graph_result_context)
-
-        context = asyncio.run(self.async_generate(context, context_head_str, context_tail_str,
-                                                  vector_result_context, graph_result_context))
+        )
         return context
 
-    async def async_generate(self, context: Dict[str, Any], context_head_str: str,
-                             context_tail_str: str, vector_result_context: str,
-                             graph_result_context: str):
+    async def async_generate(
+        self,
+        context: Dict[str, Any],
+        context_head_str: str,
+        context_tail_str: str
+    ):
         # pylint: disable=R0912 (too-many-branches)
         verbose = context.get("verbose") or False
         # TODO: replace task_cache with a better name
         task_cache = {}
-        if self._raw_answer:
+
+        raw_answer = context.get("raw_answer", False)
+        vector_only_answer = context.get("vector_only_answer", False)
+        graph_only_answer = context.get("graph_only_answer", False)
+        graph_vector_answer = context.get("graph_vector_answer", False)
+
+        if raw_answer:
             final_prompt = self._question
-            task_cache["raw_task"] = asyncio.create_task(self._llm.agenerate(prompt=final_prompt))
-        if self._vector_only_answer:
-            context_str = (f"{context_head_str}\n"
-                           f"{vector_result_context}\n"
-                           f"{context_tail_str}".strip("\n"))
-
-            final_prompt = self._prompt_template.format(context_str=context_str, query_str=self._question)
-            task_cache["vector_only_task"] = asyncio.create_task(self._llm.agenerate(prompt=final_prompt))
-        if self._graph_only_answer:
-            context_str = (f"{context_head_str}\n"
-                           f"{graph_result_context}\n"
-                           f"{context_tail_str}".strip("\n"))
-
-            final_prompt = self._prompt_template.format(context_str=context_str, query_str=self._question)
-            task_cache["graph_only_task"] = asyncio.create_task(self._llm.agenerate(prompt=final_prompt))
-        if self._graph_vector_answer:
+            task_cache["raw_task"] = asyncio.create_task(
+                self._llm.agenerate(prompt=final_prompt)
+            )
+        if vector_only_answer:
+            vector_result = context.get("vector_result")
+            vector_result_context = _get_vector_result_str(vector_result)
+
+            context_str = (
+                f"{context_head_str}\n"
+                f"{vector_result_context}\n"
+                f"{context_tail_str}".strip("\n")
+            )
+            context["vector_contexts"] = vector_result
+
+            final_prompt = self._prompt_template.format(
+                context_str=context_str, query_str=self._question
+            )
+            task_cache["vector_only_task"] = asyncio.create_task(
+                self._llm.agenerate(prompt=final_prompt)
+            )
+        if graph_only_answer:
+
+            graph_result = context.get("graph_result")
+            graph_context_head = context.get(
+                "graph_context_head", "Knowledge from graphdb for the query:\n"
+            )
+            graph_result_context = _get_graph_result_str(graph_context_head, graph_result)
+            
+            context_str = (
+                f"{context_head_str}\n"
+                f"{graph_result_context}\n"
+                f"{context_tail_str}".strip("\n")
+            )
+            context["graph_contexts"] = graph_result
+
+            final_prompt = self._prompt_template.format(
+                context_str=context_str, query_str=self._question
+            )
+            task_cache["graph_only_task"] = asyncio.create_task(
+                self._llm.agenerate(prompt=final_prompt)
+            )
+        if graph_vector_answer:
+            vector_result = context.get("vector_result")
+            vector_rerank_length = context.get("vector_rerank_length")
+            vector_result_context = _get_vector_result_str(vector_result[:vector_rerank_length])
+            
+            graph_result = context.get("graph_result")
+            graph_rerank_length = context.get("graph_rerank_length")
+            graph_context_head = context.get(
+                "graph_context_head", "Knowledge from graphdb for the query:\n"
+            )
+            graph_result_context = _get_graph_result_str(graph_context_head, graph_result[:graph_rerank_length])
             context_body_str = f"{vector_result_context}\n{graph_result_context}"
+
+            context["graph_vector_contexts"] = vector_result[:vector_rerank_length] + graph_result[:graph_rerank_length]
+
             if context.get("graph_ratio", 0.5) < 0.5:
                 context_body_str = f"{graph_result_context}\n{vector_result_context}"
-            context_str = (f"{context_head_str}\n"
-                           f"{context_body_str}\n"
-                           f"{context_tail_str}".strip("\n"))
+            context_str = (
+                f"{context_head_str}\n"
+                f"{context_body_str}\n"
+                f"{context_tail_str}".strip("\n")
+            )
 
-            final_prompt = self._prompt_template.format(context_str=context_str, query_str=self._question)
+            final_prompt = self._prompt_template.format(
+                context_str=context_str, query_str=self._question
+            )
+            
             task_cache["graph_vector_task"] = asyncio.create_task(
                 self._llm.agenerate(prompt=final_prompt)
             )
         # TODO: use log.debug instead of print
         if task_cache.get("raw_task"):
             response = await task_cache["raw_task"]
-            context["raw_answer"] = response
+            context["raw_answer_result"] = response
             if verbose:
                 print(f"\033[91mANSWER: {response}\033[0m")
         if task_cache.get("vector_only_task"):
             response = await task_cache["vector_only_task"]
-            context["vector_only_answer"] = response
+            context["vector_only_answer_result"] = response
             if verbose:
                 print(f"\033[91mANSWER: {response}\033[0m")
         if task_cache.get("graph_only_task"):
             response = await task_cache["graph_only_task"]
-            context["graph_only_answer"] = response
+            context["graph_only_answer_result"] = response
             if verbose:
                 print(f"\033[91mANSWER: {response}\033[0m")
         if task_cache.get("graph_vector_task"):
             response = await task_cache["graph_vector_task"]
-            context["graph_vector_answer"] = response
+            context["graph_vector_answer_result"] = response
             if verbose:
                 print(f"\033[91mANSWER: {response}\033[0m")
 
-        ops = sum([self._raw_answer, self._vector_only_answer, self._graph_only_answer, self._graph_vector_answer])
-        context['call_count'] = context.get('call_count', 0) + ops
+        ops = sum(
+            [raw_answer, vector_only_answer, graph_only_answer, graph_vector_answer]
+        )
+        context["call_count"] = context.get("call_count", 0) + ops
         return context
diff --git a/hugegraph-llm/src/hugegraph_llm/resources/demo/questions_template.xlsx b/hugegraph-llm/src/hugegraph_llm/resources/demo/questions_template.xlsx
index deb70c121..9b630f7de 100644
Binary files a/hugegraph-llm/src/hugegraph_llm/resources/demo/questions_template.xlsx and b/hugegraph-llm/src/hugegraph_llm/resources/demo/questions_template.xlsx differ
diff --git a/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py b/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py
new file mode 100644
index 000000000..f0535b386
--- /dev/null
+++ b/hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pysbd import Segmenter
+from ragas.metrics import (
+    ContextEntityRecall,
+    FactualCorrectness,
+    Faithfulness,
+    LLMContextPrecisionWithoutReference,
+    LLMContextPrecisionWithReference,
+    LLMContextRecall,
+    NoiseSensitivity,
+    ResponseRelevancy,
+)
+
+RAGAS_METRICS_DICT = {
+    "context_entity_recall": ContextEntityRecall(),
+    "factual_correctness": FactualCorrectness(),
+    "faithfulness": Faithfulness(),
+    "llm_context_precision_without_reference": LLMContextPrecisionWithoutReference(),
+    "llm_context_precision_with_reference": LLMContextPrecisionWithReference(),
+    "llm_context_recall": LLMContextRecall(),
+    "noise_sensitivity": NoiseSensitivity(),
+    "response_relevancy": ResponseRelevancy(),
+}
+
+RAGAS_METRICS_ZH_DICT = {
+    "context_entity_recall": ContextEntityRecall(),
+    "factual_correctness": FactualCorrectness(sentence_segmenter=Segmenter(language="zh", clean=True)),
+    "faithfulness": Faithfulness(sentence_segmenter=Segmenter(language="zh", clean=True)),
+    "llm_context_precision_without_reference": LLMContextPrecisionWithoutReference(),
+    "llm_context_precision_with_reference": LLMContextPrecisionWithReference(),
+    "llm_context_recall": LLMContextRecall(),
+    "noise_sensitivity": NoiseSensitivity(sentence_segmenter=Segmenter(language="zh", clean=True)),
+    "response_relevancy": ResponseRelevancy(),
+}
+
diff --git a/hugegraph-python-client/requirements.txt b/hugegraph-python-client/requirements.txt
index d7a8148a0..c4df41c62 100644
--- a/hugegraph-python-client/requirements.txt
+++ b/hugegraph-python-client/requirements.txt
@@ -1,4 +1,4 @@
 decorator==5.1.1
-requests==2.32.0
+requests~=2.32.0
 setuptools==70.0.0
 urllib3==2.2.2