Skip to content

Commit 10844f0

Browse files
authored
feat: a common util method of formatting history for agent graders. (#42)
1 parent 5af2748 commit 10844f0

File tree

9 files changed

+42
-193
lines changed

9 files changed

+42
-193
lines changed

openjudge/graders/agent/action/action_alignment.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from loguru import logger
1212

13+
from openjudge.graders.agent.utils import format_history
1314
from openjudge.graders.base_grader import GraderMode, GraderScore
1415
from openjudge.graders.llm_grader import LLMGrader
1516
from openjudge.models.base_chat_model import BaseChatModel
@@ -189,29 +190,6 @@ def __init__(
189190
language=language,
190191
)
191192

192-
def _format_history(self, history: Optional[list] = None) -> str:
193-
"""Format history steps for evaluation.
194-
195-
Args:
196-
history: Optional list of previous step dictionaries
197-
198-
Returns:
199-
Formatted history string, or empty string if no history
200-
"""
201-
if not history:
202-
return ""
203-
204-
lines = ["<History Steps>"]
205-
for i, hist_step in enumerate(history):
206-
lines.append(f"Step {i + 1}:")
207-
for key, value in hist_step.items():
208-
if value:
209-
lines.append(f"{key.capitalize()}: {value}")
210-
lines.append("")
211-
lines.append("</History Steps>")
212-
213-
return "\n".join(lines)
214-
215193
async def aevaluate(
216194
self,
217195
plan: str,
@@ -244,7 +222,7 @@ async def aevaluate(
244222
context_str = f"<context>\n{context}\n</context>"
245223

246224
# Format history
247-
history_str = self._format_history(history)
225+
history_str = format_history(history)
248226

249227
try:
250228
result = await super().aevaluate(

openjudge/graders/agent/memory/memory_accuracy.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from loguru import logger
1212

13+
from openjudge.graders.agent.utils import format_history
1314
from openjudge.graders.base_grader import GraderMode, GraderScore
1415
from openjudge.graders.llm_grader import LLMGrader
1516
from openjudge.models.base_chat_model import BaseChatModel
@@ -180,29 +181,6 @@ def __init__(
180181
language=language,
181182
)
182183

183-
def _format_history(self, history: Optional[list] = None) -> str:
184-
"""Format history steps for evaluation.
185-
186-
Args:
187-
history: Optional list of previous step dictionaries
188-
189-
Returns:
190-
Formatted history string, or empty string if no history
191-
"""
192-
if not history:
193-
return ""
194-
195-
lines = ["<History Steps>"]
196-
for i, hist_step in enumerate(history):
197-
lines.append(f"Step {i + 1}:")
198-
for key, value in hist_step.items():
199-
if value:
200-
lines.append(f"{key.capitalize()}: {value}")
201-
lines.append("")
202-
lines.append("</History Steps>")
203-
204-
return "\n".join(lines)
205-
206184
async def aevaluate(
207185
self,
208186
observation: str,
@@ -237,7 +215,7 @@ async def aevaluate(
237215
context_str = f"<context>\n{context}\n</context>"
238216

239217
# Format history
240-
history_str = self._format_history(history)
218+
history_str = format_history(history)
241219

242220
try:
243221
result = await super().aevaluate(

openjudge/graders/agent/memory/memory_detail_preservation.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from loguru import logger
1212

13+
from openjudge.graders.agent.utils import format_history
1314
from openjudge.graders.base_grader import GraderMode, GraderScore
1415
from openjudge.graders.llm_grader import LLMGrader
1516
from openjudge.models.base_chat_model import BaseChatModel
@@ -180,29 +181,6 @@ def __init__(
180181
language=language,
181182
)
182183

183-
def _format_history(self, history: Optional[list] = None) -> str:
184-
"""Format history steps for evaluation.
185-
186-
Args:
187-
history: Optional list of previous step dictionaries
188-
189-
Returns:
190-
Formatted history string, or empty string if no history
191-
"""
192-
if not history:
193-
return ""
194-
195-
lines = ["<History Steps>"]
196-
for i, hist_step in enumerate(history):
197-
lines.append(f"Step {i + 1}:")
198-
for key, value in hist_step.items():
199-
if value:
200-
lines.append(f"{key.capitalize()}: {value}")
201-
lines.append("")
202-
lines.append("</History Steps>")
203-
204-
return "\n".join(lines)
205-
206184
async def aevaluate(
207185
self,
208186
observation: str,
@@ -237,7 +215,7 @@ async def aevaluate(
237215
context_str = f"<context>\n{context}\n</context>"
238216

239217
# Format history
240-
history_str = self._format_history(history)
218+
history_str = format_history(history)
241219

242220
try:
243221
result = await super().aevaluate(

openjudge/graders/agent/memory/memory_retrieval_effectiveness.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from loguru import logger
1212

13+
from openjudge.graders.agent.utils import format_history
1314
from openjudge.graders.base_grader import GraderMode, GraderScore
1415
from openjudge.graders.llm_grader import LLMGrader
1516
from openjudge.models.base_chat_model import BaseChatModel
@@ -183,29 +184,6 @@ def __init__(
183184
language=language,
184185
)
185186

186-
def _format_history(self, history: Optional[list] = None) -> str:
187-
"""Format history steps for evaluation.
188-
189-
Args:
190-
history: Optional list of previous step dictionaries
191-
192-
Returns:
193-
Formatted history string, or empty string if no history
194-
"""
195-
if not history:
196-
return ""
197-
198-
lines = ["<History Steps>"]
199-
for i, hist_step in enumerate(history):
200-
lines.append(f"Step {i + 1}:")
201-
for key, value in hist_step.items():
202-
if value:
203-
lines.append(f"{key.capitalize()}: {value}")
204-
lines.append("")
205-
lines.append("</History Steps>")
206-
207-
return "\n".join(lines)
208-
209187
async def aevaluate(
210188
self,
211189
plan: str,
@@ -243,7 +221,7 @@ async def aevaluate(
243221
context_str = f"<context>\n{context}\n</context>"
244222

245223
# Format history
246-
history_str = self._format_history(history)
224+
history_str = format_history(history)
247225

248226
try:
249227
result = await super().aevaluate(

openjudge/graders/agent/plan/plan_feasibility.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from loguru import logger
1212

13+
from openjudge.graders.agent.utils import format_history
1314
from openjudge.graders.base_grader import GraderMode, GraderScore
1415
from openjudge.graders.llm_grader import LLMGrader
1516
from openjudge.models.base_chat_model import BaseChatModel
@@ -183,29 +184,6 @@ def __init__(
183184
language=language,
184185
)
185186

186-
def _format_history(self, history: Optional[list] = None) -> str:
187-
"""Format history steps for evaluation.
188-
189-
Args:
190-
history: Optional list of previous step dictionaries
191-
192-
Returns:
193-
Formatted history string, or empty string if no history
194-
"""
195-
if not history:
196-
return ""
197-
198-
lines = ["<History Steps>"]
199-
for i, hist_step in enumerate(history):
200-
lines.append(f"Step {i + 1}:")
201-
for key, value in hist_step.items():
202-
if value:
203-
lines.append(f"{key.capitalize()}: {value}")
204-
lines.append("")
205-
lines.append("</History Steps>")
206-
207-
return "\n".join(lines)
208-
209187
async def aevaluate(
210188
self,
211189
plan: str,
@@ -243,7 +221,7 @@ async def aevaluate(
243221
context_str = f"<context>\n{context}\n</context>"
244222

245223
# Format history
246-
history_str = self._format_history(history)
224+
history_str = format_history(history)
247225

248226
try:
249227
result = await super().aevaluate(

openjudge/graders/agent/reflection/reflection_accuracy.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from loguru import logger
1212

13+
from openjudge.graders.agent.utils import format_history
1314
from openjudge.graders.base_grader import GraderMode, GraderScore
1415
from openjudge.graders.llm_grader import LLMGrader
1516
from openjudge.models.base_chat_model import BaseChatModel
@@ -180,29 +181,6 @@ def __init__(
180181
language=language,
181182
)
182183

183-
def _format_history(self, history: Optional[list] = None) -> str:
184-
"""Format history steps for evaluation.
185-
186-
Args:
187-
history: Optional list of previous step dictionaries
188-
189-
Returns:
190-
Formatted history string, or empty string if no history
191-
"""
192-
if not history:
193-
return ""
194-
195-
lines = ["<History Steps>"]
196-
for i, hist_step in enumerate(history):
197-
lines.append(f"Step {i + 1}:")
198-
for key, value in hist_step.items():
199-
if value:
200-
lines.append(f"{key.capitalize()}: {value}")
201-
lines.append("")
202-
lines.append("</History Steps>")
203-
204-
return "\n".join(lines)
205-
206184
async def aevaluate(
207185
self,
208186
observation: str,
@@ -237,7 +215,7 @@ async def aevaluate(
237215
context_str = f"<context>\n{context}\n</context>"
238216

239217
# Format history
240-
history_str = self._format_history(history)
218+
history_str = format_history(history)
241219

242220
try:
243221
result = await super().aevaluate(

openjudge/graders/agent/reflection/reflection_outcome_understanding.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from loguru import logger
1313

14+
from openjudge.graders.agent.utils import format_history
1415
from openjudge.graders.base_grader import GraderMode, GraderScore
1516
from openjudge.graders.llm_grader import LLMGrader
1617
from openjudge.models.base_chat_model import BaseChatModel
@@ -304,29 +305,6 @@ def __init__(
304305
language=language,
305306
)
306307

307-
def _format_history(self, history: Optional[list] = None) -> str:
308-
"""Format history steps for evaluation.
309-
310-
Args:
311-
history: Optional list of previous step dictionaries
312-
313-
Returns:
314-
Formatted history string, or empty string if no history
315-
"""
316-
if not history:
317-
return ""
318-
319-
lines = ["<History Steps>"]
320-
for i, hist_step in enumerate(history):
321-
lines.append(f"Step {i + 1}:")
322-
for key, value in hist_step.items():
323-
if value:
324-
lines.append(f"{key.capitalize()}: {value}")
325-
lines.append("")
326-
lines.append("</History Steps>")
327-
328-
return "\n".join(lines)
329-
330308
async def aevaluate(
331309
self,
332310
observation: str,
@@ -361,7 +339,7 @@ async def aevaluate(
361339
context_str = f"<context>\n{context}\n</context>"
362340

363341
# Format history
364-
history_str = self._format_history(history)
342+
history_str = format_history(history)
365343

366344
try:
367345
result = await super().aevaluate(

openjudge/graders/agent/reflection/reflection_progress_awareness.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from loguru import logger
1313

14+
from openjudge.graders.agent.utils import format_history
1415
from openjudge.graders.base_grader import GraderMode, GraderScore
1516
from openjudge.graders.llm_grader import LLMGrader
1617
from openjudge.models.base_chat_model import BaseChatModel
@@ -221,29 +222,6 @@ def __init__(
221222
language=language,
222223
)
223224

224-
def _format_history(self, history: Optional[list] = None) -> str:
225-
"""Format history steps for evaluation.
226-
227-
Args:
228-
history: Optional list of previous step dictionaries
229-
230-
Returns:
231-
Formatted history string, or empty string if no history
232-
"""
233-
if not history:
234-
return ""
235-
236-
lines = ["<History Steps>"]
237-
for i, hist_step in enumerate(history):
238-
lines.append(f"Step {i + 1}:")
239-
for key, value in hist_step.items():
240-
if value:
241-
lines.append(f"{key.capitalize()}: {value}")
242-
lines.append("")
243-
lines.append("</History Steps>")
244-
245-
return "\n".join(lines)
246-
247225
async def aevaluate(
248226
self,
249227
observation: str,
@@ -278,7 +256,7 @@ async def aevaluate(
278256
context_str = f"<context>\n{context}\n</context>"
279257

280258
# Format history
281-
history_str = self._format_history(history)
259+
history_str = format_history(history)
282260

283261
try:
284262
result = await super().aevaluate(

0 commit comments

Comments
 (0)