diff --git a/openjudge/graders/agent/action/action_alignment.py b/openjudge/graders/agent/action/action_alignment.py
index c083a11a..d8c025a8 100644
--- a/openjudge/graders/agent/action/action_alignment.py
+++ b/openjudge/graders/agent/action/action_alignment.py
@@ -10,6 +10,7 @@
 
 from loguru import logger
 
+from openjudge.graders.agent.utils import format_history
 from openjudge.graders.base_grader import GraderMode, GraderScore
 from openjudge.graders.llm_grader import LLMGrader
 from openjudge.models.base_chat_model import BaseChatModel
@@ -189,29 +190,6 @@ def __init__(
             language=language,
         )
 
-    def _format_history(self, history: Optional[list] = None) -> str:
-        """Format history steps for evaluation.
-
-        Args:
-            history: Optional list of previous step dictionaries
-
-        Returns:
-            Formatted history string, or empty string if no history
-        """
-        if not history:
-            return ""
-
-        lines = ["<History Steps>"]
-        for i, hist_step in enumerate(history):
-            lines.append(f"Step {i + 1}:")
-            for key, value in hist_step.items():
-                if value:
-                    lines.append(f"{key.capitalize()}: {value}")
-            lines.append("")
-        lines.append("</History Steps>")
-
-        return "\n".join(lines)
-
     async def aevaluate(
         self,
         plan: str,
@@ -244,7 +222,7 @@ async def aevaluate(
             context_str = f"<context>\n{context}\n</context>"
 
         # Format history
-        history_str = self._format_history(history)
+        history_str = format_history(history)
 
         try:
             result = await super().aevaluate(
diff --git a/openjudge/graders/agent/memory/memory_accuracy.py b/openjudge/graders/agent/memory/memory_accuracy.py
index dd7067b5..da23a424 100644
--- a/openjudge/graders/agent/memory/memory_accuracy.py
+++ b/openjudge/graders/agent/memory/memory_accuracy.py
@@ -10,6 +10,7 @@
 
 from loguru import logger
 
+from openjudge.graders.agent.utils import format_history
 from openjudge.graders.base_grader import GraderMode, GraderScore
 from openjudge.graders.llm_grader import LLMGrader
 from openjudge.models.base_chat_model import BaseChatModel
@@ -180,29 +181,6 @@ def __init__(
             language=language,
         )
 
-    def _format_history(self, history: Optional[list] = None) -> str:
-        """Format history steps for evaluation.
-
-        Args:
-            history: Optional list of previous step dictionaries
-
-        Returns:
-            Formatted history string, or empty string if no history
-        """
-        if not history:
-            return ""
-
-        lines = ["<History Steps>"]
-        for i, hist_step in enumerate(history):
-            lines.append(f"Step {i + 1}:")
-            for key, value in hist_step.items():
-                if value:
-                    lines.append(f"{key.capitalize()}: {value}")
-            lines.append("")
-        lines.append("</History Steps>")
-
-        return "\n".join(lines)
-
     async def aevaluate(
         self,
         observation: str,
@@ -237,7 +215,7 @@ async def aevaluate(
             context_str = f"<context>\n{context}\n</context>"
 
         # Format history
-        history_str = self._format_history(history)
+        history_str = format_history(history)
 
         try:
             result = await super().aevaluate(
diff --git a/openjudge/graders/agent/memory/memory_detail_preservation.py b/openjudge/graders/agent/memory/memory_detail_preservation.py
index 28b3eb6c..c3c79c83 100644
--- a/openjudge/graders/agent/memory/memory_detail_preservation.py
+++ b/openjudge/graders/agent/memory/memory_detail_preservation.py
@@ -10,6 +10,7 @@
 
 from loguru import logger
 
+from openjudge.graders.agent.utils import format_history
 from openjudge.graders.base_grader import GraderMode, GraderScore
 from openjudge.graders.llm_grader import LLMGrader
 from openjudge.models.base_chat_model import BaseChatModel
@@ -180,29 +181,6 @@ def __init__(
             language=language,
         )
 
-    def _format_history(self, history: Optional[list] = None) -> str:
-        """Format history steps for evaluation.
-
-        Args:
-            history: Optional list of previous step dictionaries
-
-        Returns:
-            Formatted history string, or empty string if no history
-        """
-        if not history:
-            return ""
-
-        lines = ["<History Steps>"]
-        for i, hist_step in enumerate(history):
-            lines.append(f"Step {i + 1}:")
-            for key, value in hist_step.items():
-                if value:
-                    lines.append(f"{key.capitalize()}: {value}")
-            lines.append("")
-        lines.append("</History Steps>")
-
-        return "\n".join(lines)
-
     async def aevaluate(
         self,
         observation: str,
@@ -237,7 +215,7 @@ async def aevaluate(
             context_str = f"<context>\n{context}\n</context>"
 
         # Format history
-        history_str = self._format_history(history)
+        history_str = format_history(history)
 
         try:
             result = await super().aevaluate(
diff --git a/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py b/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py
index 2cb9e5b1..2204706c 100644
--- a/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py
+++ b/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py
@@ -10,6 +10,7 @@
 
 from loguru import logger
 
+from openjudge.graders.agent.utils import format_history
 from openjudge.graders.base_grader import GraderMode, GraderScore
 from openjudge.graders.llm_grader import LLMGrader
 from openjudge.models.base_chat_model import BaseChatModel
@@ -183,29 +184,6 @@ def __init__(
             language=language,
         )
 
-    def _format_history(self, history: Optional[list] = None) -> str:
-        """Format history steps for evaluation.
-
-        Args:
-            history: Optional list of previous step dictionaries
-
-        Returns:
-            Formatted history string, or empty string if no history
-        """
-        if not history:
-            return ""
-
-        lines = ["<History Steps>"]
-        for i, hist_step in enumerate(history):
-            lines.append(f"Step {i + 1}:")
-            for key, value in hist_step.items():
-                if value:
-                    lines.append(f"{key.capitalize()}: {value}")
-            lines.append("")
-        lines.append("</History Steps>")
-
-        return "\n".join(lines)
-
     async def aevaluate(
         self,
         plan: str,
@@ -243,7 +221,7 @@ async def aevaluate(
             context_str = f"<context>\n{context}\n</context>"
 
         # Format history
-        history_str = self._format_history(history)
+        history_str = format_history(history)
 
         try:
             result = await super().aevaluate(
diff --git a/openjudge/graders/agent/plan/plan_feasibility.py b/openjudge/graders/agent/plan/plan_feasibility.py
index ebf4dd68..c69686e3 100644
--- a/openjudge/graders/agent/plan/plan_feasibility.py
+++ b/openjudge/graders/agent/plan/plan_feasibility.py
@@ -10,6 +10,7 @@
 
 from loguru import logger
 
+from openjudge.graders.agent.utils import format_history
 from openjudge.graders.base_grader import GraderMode, GraderScore
 from openjudge.graders.llm_grader import LLMGrader
 from openjudge.models.base_chat_model import BaseChatModel
@@ -183,29 +184,6 @@ def __init__(
             language=language,
         )
 
-    def _format_history(self, history: Optional[list] = None) -> str:
-        """Format history steps for evaluation.
-
-        Args:
-            history: Optional list of previous step dictionaries
-
-        Returns:
-            Formatted history string, or empty string if no history
-        """
-        if not history:
-            return ""
-
-        lines = ["<History Steps>"]
-        for i, hist_step in enumerate(history):
-            lines.append(f"Step {i + 1}:")
-            for key, value in hist_step.items():
-                if value:
-                    lines.append(f"{key.capitalize()}: {value}")
-            lines.append("")
-        lines.append("</History Steps>")
-
-        return "\n".join(lines)
-
     async def aevaluate(
         self,
         plan: str,
@@ -243,7 +221,7 @@ async def aevaluate(
             context_str = f"<context>\n{context}\n</context>"
 
         # Format history
-        history_str = self._format_history(history)
+        history_str = format_history(history)
 
         try:
             result = await super().aevaluate(
diff --git a/openjudge/graders/agent/reflection/reflection_accuracy.py b/openjudge/graders/agent/reflection/reflection_accuracy.py
index 296cfab7..8b54d462 100644
--- a/openjudge/graders/agent/reflection/reflection_accuracy.py
+++ b/openjudge/graders/agent/reflection/reflection_accuracy.py
@@ -10,6 +10,7 @@
 
 from loguru import logger
 
+from openjudge.graders.agent.utils import format_history
 from openjudge.graders.base_grader import GraderMode, GraderScore
 from openjudge.graders.llm_grader import LLMGrader
 from openjudge.models.base_chat_model import BaseChatModel
@@ -180,29 +181,6 @@ def __init__(
             language=language,
         )
 
-    def _format_history(self, history: Optional[list] = None) -> str:
-        """Format history steps for evaluation.
-
-        Args:
-            history: Optional list of previous step dictionaries
-
-        Returns:
-            Formatted history string, or empty string if no history
-        """
-        if not history:
-            return ""
-
-        lines = ["<History Steps>"]
-        for i, hist_step in enumerate(history):
-            lines.append(f"Step {i + 1}:")
-            for key, value in hist_step.items():
-                if value:
-                    lines.append(f"{key.capitalize()}: {value}")
-            lines.append("")
-        lines.append("</History Steps>")
-
-        return "\n".join(lines)
-
     async def aevaluate(
         self,
         observation: str,
@@ -237,7 +215,7 @@ async def aevaluate(
             context_str = f"<context>\n{context}\n</context>"
 
         # Format history
-        history_str = self._format_history(history)
+        history_str = format_history(history)
 
         try:
             result = await super().aevaluate(
diff --git a/openjudge/graders/agent/reflection/reflection_outcome_understanding.py b/openjudge/graders/agent/reflection/reflection_outcome_understanding.py
index 6a15b2ef..e7ce341f 100644
--- a/openjudge/graders/agent/reflection/reflection_outcome_understanding.py
+++ b/openjudge/graders/agent/reflection/reflection_outcome_understanding.py
@@ -11,6 +11,7 @@
 
 from loguru import logger
 
+from openjudge.graders.agent.utils import format_history
 from openjudge.graders.base_grader import GraderMode, GraderScore
 from openjudge.graders.llm_grader import LLMGrader
 from openjudge.models.base_chat_model import BaseChatModel
@@ -304,29 +305,6 @@ def __init__(
             language=language,
         )
 
-    def _format_history(self, history: Optional[list] = None) -> str:
-        """Format history steps for evaluation.
-
-        Args:
-            history: Optional list of previous step dictionaries
-
-        Returns:
-            Formatted history string, or empty string if no history
-        """
-        if not history:
-            return ""
-
-        lines = ["<History Steps>"]
-        for i, hist_step in enumerate(history):
-            lines.append(f"Step {i + 1}:")
-            for key, value in hist_step.items():
-                if value:
-                    lines.append(f"{key.capitalize()}: {value}")
-            lines.append("")
-        lines.append("</History Steps>")
-
-        return "\n".join(lines)
-
     async def aevaluate(
         self,
         observation: str,
@@ -361,7 +339,7 @@ async def aevaluate(
             context_str = f"<context>\n{context}\n</context>"
 
         # Format history
-        history_str = self._format_history(history)
+        history_str = format_history(history)
 
         try:
             result = await super().aevaluate(
diff --git a/openjudge/graders/agent/reflection/reflection_progress_awareness.py b/openjudge/graders/agent/reflection/reflection_progress_awareness.py
index 896b2cb3..8d1d0f34 100644
--- a/openjudge/graders/agent/reflection/reflection_progress_awareness.py
+++ b/openjudge/graders/agent/reflection/reflection_progress_awareness.py
@@ -11,6 +11,7 @@
 
 from loguru import logger
 
+from openjudge.graders.agent.utils import format_history
 from openjudge.graders.base_grader import GraderMode, GraderScore
 from openjudge.graders.llm_grader import LLMGrader
 from openjudge.models.base_chat_model import BaseChatModel
@@ -221,29 +222,6 @@ def __init__(
             language=language,
         )
 
-    def _format_history(self, history: Optional[list] = None) -> str:
-        """Format history steps for evaluation.
-
-        Args:
-            history: Optional list of previous step dictionaries
-
-        Returns:
-            Formatted history string, or empty string if no history
-        """
-        if not history:
-            return ""
-
-        lines = ["<History Steps>"]
-        for i, hist_step in enumerate(history):
-            lines.append(f"Step {i + 1}:")
-            for key, value in hist_step.items():
-                if value:
-                    lines.append(f"{key.capitalize()}: {value}")
-            lines.append("")
-        lines.append("</History Steps>")
-
-        return "\n".join(lines)
-
     async def aevaluate(
         self,
         observation: str,
@@ -278,7 +256,7 @@ async def aevaluate(
             context_str = f"<context>\n{context}\n</context>"
 
         # Format history
-        history_str = self._format_history(history)
+        history_str = format_history(history)
 
         try:
             result = await super().aevaluate(
diff --git a/openjudge/graders/agent/utils.py b/openjudge/graders/agent/utils.py
index 32b76038..17af90a4 100644
--- a/openjudge/graders/agent/utils.py
+++ b/openjudge/graders/agent/utils.py
@@ -4,7 +4,7 @@
 This module provides utility functions for analyzing agent action behaviors,
 including action-observation pair extraction and similarity calculations.
 """
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 from loguru import logger
 from sklearn.feature_extraction.text import TfidfVectorizer
@@ -147,8 +147,33 @@ def calculate_semantic_similarity(text1: str, text2: str) -> float:
         return 0.0
 
 
+def format_history(history: Optional[list] = None) -> str:
+    """Format history steps for evaluation.
+
+    Args:
+        history: Optional list of previous step dictionaries
+
+    Returns:
+        Formatted history string, or empty string if no history
+    """
+    if not history:
+        return ""
+
+    lines = ["<History Steps>"]
+    for i, hist_step in enumerate(history):
+        lines.append(f"Step {i + 1}:")
+        for key, value in hist_step.items():
+            if value:
+                lines.append(f"{key.capitalize()}: {value}")
+        lines.append("")
+    lines.append("</History Steps>")
+
+    return "\n".join(lines)
+
+
 __all__ = [
     "extract_action_observation_pairs",
     "calculate_text_similarity",
     "calculate_semantic_similarity",
+    "format_history",
 ]