Skip to content

Commit d722e38

Browse files
authored
feat: update example code and template processing in agent and common graders (#49)
* feat: update example code and template processing in agent and common graders * feat: fix grader template long text format issue
1 parent aa2415a commit d722e38

16 files changed

+113
-71
lines changed

openjudge/graders/agent/action/action_alignment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class ActionAlignmentGrader(LLMGrader):
165165
>>> result = asyncio.run(grader.aevaluate(
166166
... plan="I will open drawer 1 to find the key.",
167167
... action="open drawer 1"
168-
... )
168+
... ))
169169
>>> print(f"Score: {result.score}") # Expected: 1.0
170170
"""
171171

openjudge/graders/agent/action/action_loop.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@ class ActionLoopDetectionGrader(BaseGrader):
2121
all pairs of actions for similarity and penalizing based on the proportion
2222
of similar action pairs found.
2323
Example:
24+
>>> import asyncio
2425
>>> grader = ActionLoopDetectionGrader(similarity_threshold=1.0)
25-
>>> result = await grader.aevaluate(
26+
>>> result = asyncio.run(grader.aevaluate(
2627
... messages=[...],
27-
... )
28+
... ))
2829
>>> print(f"Loop detection score: {result.score}")
2930
"""
3031

openjudge/graders/agent/memory/memory_accuracy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class MemoryAccuracyGrader(LLMGrader):
165165
>>> result = asyncio.run(grader.aevaluate(
166166
... observation="You see a closed cabinet.",
167167
... memory="The cabinet is closed."
168-
... )
168+
... ))
169169
>>> print(f"Score: {result.score}") # Expected: 1.0
170170
"""
171171

openjudge/graders/agent/memory/memory_detail_preservation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class MemoryDetailPreservationGrader(LLMGrader):
165165
>>> result = asyncio.run(grader.aevaluate(
166166
... observation="Cabinet 1 at coordinates (3.5, 2.1) contains 5 red apples.",
167167
... memory="Cabinet 1 at (3.5, 2.1) has 5 red apples."
168-
... )
168+
... ))
169169
>>> print(f"Score: {result.score}") # Expected: 1.0
170170
"""
171171

openjudge/graders/agent/memory/memory_retrieval_effectiveness.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ class MemoryRetrievalEffectivenessGrader(LLMGrader):
167167
>>> result = asyncio.run(grader.aevaluate(
168168
... observation="You see a closed cabinet.",
169169
... memory="The cabinet is closed."
170-
... )
170+
... ))
171171
>>> print(f"Score: {result.score}") # Expected: 1.0
172172
"""
173173

openjudge/graders/agent/observation/observation_information_gain.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@ class ObservationInformationGainGrader(BaseGrader):
2323
Attributes:
2424
similarity_threshold: Threshold for considering observations as redundant
2525
Example:
26+
>>> import asyncio
2627
>>> grader = ObservationInformationGainGrader(similarity_threshold=0.5)
27-
>>> result = await grader.aevaluate(
28+
>>> result = asyncio.run( grader.aevaluate(
2829
... messages=[...], # List of message dicts
29-
... )
30+
... ))
3031
>>> print(f"Info gain score: {result.score}")
3132
"""
3233

openjudge/graders/agent/plan/plan_feasibility.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ class PlanFeasibilityGrader(LLMGrader):
168168
... plan="I will first open the drawer to get the key, then use it to unlock the door.",
169169
... observation="The drawer is closed. You don't have any items.",
170170
... memory="The key is inside the drawer."
171-
... )
171+
... ))
172172
>>> print(f"Score: {result.score}") # Expected: 1.0
173173
"""
174174

openjudge/graders/agent/reflection/reflection_accuracy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class ReflectionAccuracyGrader(LLMGrader):
165165
>>> result = asyncio.run(grader.aevaluate(
166166
... observation="You see a closed cabinet.",
167167
... reflection="I observed a closed cabinet."
168-
... )
168+
... ))
169169
>>> print(f"Score: {result.score}") # Expected: 1.0
170170
"""
171171

openjudge/graders/agent/reflection/reflection_outcome_understanding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ class ReflectionOutcomeUnderstandingGrader(LLMGrader):
289289
>>> result = asyncio.run(grader.aevaluate(
290290
... observation="The drawer is now open.",
291291
... reflection="I successfully opened the drawer."
292-
... )
292+
... ))
293293
>>> print(f"Score: {result.score}") # Expected: 1.0
294294
"""
295295

openjudge/graders/agent/reflection/reflection_progress_awareness.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ class ReflectionProgressAwarenessGrader(LLMGrader):
206206
... observation="Cabinet 1 now has apples. Task complete.",
207207
... reflection="Good progress! I've successfully found the apples.",
208208
... context="Task: Find apples in cabinets"
209-
... )
209+
... ))
210210
>>> print(f"Score: {result.score}") # Expected: 1.0
211211
"""
212212

0 commit comments

Comments
 (0)