Skip to content

Commit 50515db

Browse files
committed
Implement debate judge
1 parent 88da0f4 commit 50515db

File tree

4 files changed

+302
-65
lines changed

4 files changed

+302
-65
lines changed

pyproject.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
[project]
2-
name = "green-agent-template"
2+
name = "debate-judge"
33
version = "0.1.0"
4-
description = "A template for A2A green agents"
4+
description = "A2A agent that orchestrates and judges debate"
55
readme = "README.md"
66
requires-python = ">=3.13"
77
dependencies = [
88
"a2a-sdk[http-server]>=0.3.20",
9+
"google-genai>=1.55.0",
910
"pydantic>=2.12.5",
11+
"python-dotenv>=1.2.1",
1012
"uvicorn>=0.38.0",
1113
]
1214

src/agent.py

Lines changed: 164 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,48 @@
1-
from typing import Any
1+
import logging
2+
from typing import Any, Literal
23
from pydantic import BaseModel, HttpUrl, ValidationError
4+
from dotenv import load_dotenv
5+
36
from a2a.server.tasks import TaskUpdater
47
from a2a.types import Message, TaskState, Part, TextPart, DataPart
58
from a2a.utils import get_message_text, new_agent_text_message
9+
from google import genai
610

711
from messenger import Messenger
812

913

14+
load_dotenv()
15+
16+
logging.basicConfig(level=logging.INFO)
17+
logger = logging.getLogger("debate_judge")
18+
19+
1020
class EvalRequest(BaseModel):
1121
"""Request format sent by the AgentBeats platform to green agents."""
1222
participants: dict[str, HttpUrl] # role -> agent URL
1323
config: dict[str, Any]
1424

25+
class DebaterScore(BaseModel):
26+
emotional_appeal: float
27+
argument_clarity: float
28+
argument_arrangement: float
29+
relevance_to_topic: float
30+
total_score: float
31+
32+
class DebateEval(BaseModel):
33+
pro_debater: DebaterScore
34+
con_debater: DebaterScore
35+
winner: Literal["pro_debater", "con_debater"]
36+
reason: str
37+
1538

1639
class Agent:
17-
# Fill in: list of required participant roles, e.g. ["pro_debater", "con_debater"]
18-
required_roles: list[str] = []
19-
# Fill in: list of required config keys, e.g. ["topic", "num_rounds"]
20-
required_config_keys: list[str] = []
40+
required_roles: list[str] = ["pro_debater", "con_debater"]
41+
required_config_keys: list[str] = ["topic", "num_rounds"]
2142

2243
def __init__(self):
2344
self.messenger = Messenger()
24-
# Initialize other state here
45+
self.client = genai.Client()
2546

2647
def validate_request(self, request: EvalRequest) -> tuple[bool, str]:
2748
missing_roles = set(self.required_roles) - set(request.participants.keys())
@@ -32,19 +53,14 @@ def validate_request(self, request: EvalRequest) -> tuple[bool, str]:
3253
if missing_config_keys:
3354
return False, f"Missing config keys: {missing_config_keys}"
3455

35-
# Add additional request validation here
56+
try:
57+
int(request.config["num_rounds"])
58+
except Exception as e:
59+
return False, f"Can't parse num_rounds: {e}"
3660

3761
return True, "ok"
3862

3963
async def run(self, message: Message, updater: TaskUpdater) -> None:
40-
"""Implement your agent logic here.
41-
42-
Args:
43-
message: The incoming message
44-
updater: Report progress (update_status) and results (add_artifact)
45-
46-
Use self.messenger.talk_to_agent(message, url) to call other agents.
47-
"""
4864
input_text = get_message_text(message)
4965

5066
try:
@@ -57,19 +73,144 @@ async def run(self, message: Message, updater: TaskUpdater) -> None:
5773
await updater.reject(new_agent_text_message(f"Invalid request: {e}"))
5874
return
5975

60-
# Replace example code below with your agent logic
61-
# Use request.participants to get participant agent URLs by role
62-
# Use request.config for assessment parameters
76+
await updater.update_status(
77+
TaskState.working,
78+
new_agent_text_message(f"Starting assessment.\n{request.model_dump_json()}")
79+
)
80+
81+
debate = await self.orchestrate_debate(
82+
request.participants, request.config["topic"], request.config["num_rounds"], updater
83+
)
84+
85+
debate_text = ""
86+
for i, (pro, con) in enumerate(
87+
zip(debate["pro_debater"], debate["con_debater"]), start=1
88+
):
89+
debate_text += f"Pro Argument {i}: {pro}\n"
90+
debate_text += f"Con Argument {i}: {con}\n"
6391

6492
await updater.update_status(
65-
TaskState.working, new_agent_text_message("Thinking...")
93+
TaskState.working,
94+
new_agent_text_message(f"Debate orchestration finished. Starting evaluation.")
6695
)
96+
logger.info("Debate orchestration finished. Evaluating debate.")
97+
98+
debate_eval: DebateEval = await self.judge_debate(request.config["topic"], debate_text)
99+
logger.info(f"Debate Evaluation:\n{debate_eval.model_dump_json()}")
100+
67101
await updater.add_artifact(
68102
parts=[
69-
Part(root=TextPart(text="The agent performed well.")),
70-
Part(root=DataPart(data={
71-
# structured assessment results
72-
}))
103+
Part(root=TextPart(text=debate_eval.reason)),
104+
Part(root=DataPart(data=debate_eval.model_dump())),
73105
],
74106
name="Result",
75107
)
108+
109+
async def orchestrate_debate(
110+
self,
111+
participants: dict[str, str],
112+
topic: str,
113+
num_rounds: int,
114+
updater: TaskUpdater,
115+
) -> dict[str, list[str]]:
116+
debate: dict[str, list[str]] = {"pro_debater": [], "con_debater": []}
117+
118+
async def turn(role: str, prompt: str) -> str:
119+
response = await self.messenger.talk_to_agent(
120+
prompt, str(participants[role]), new_conversation=False
121+
)
122+
logger.info(f"{role}: {response}")
123+
debate[role].append(response)
124+
await updater.update_status(
125+
TaskState.working, new_agent_text_message(f"{role}: {response}")
126+
)
127+
return response
128+
129+
# Opening turns
130+
response = await turn(
131+
"pro_debater", f"Debate Topic: {topic}. Present your opening argument."
132+
)
133+
response = await turn(
134+
"con_debater",
135+
f"Debate Topic: {topic}. Present your opening argument. Your opponent opened with: {response}",
136+
)
137+
138+
# Remaining rounds
139+
for _ in range(num_rounds - 1):
140+
response = await turn(
141+
"pro_debater",
142+
f"Your opponent said: {response}. Present your next argument.",
143+
)
144+
response = await turn(
145+
"con_debater",
146+
f"Your opponent said: {response}. Present your next argument.",
147+
)
148+
149+
return debate
150+
151+
async def judge_debate(self, topic: str, debate_text: str) -> DebateEval:
152+
# prompt adapted from InspireScore: https://github.com/fywang12/InspireDebate/blob/main/inspirescore.py
153+
154+
system_prompt = """
155+
You are an experienced debate judge tasked with evaluating debates. For each debate, you will assess both sides based on four key criteria: Emotional Appeal, Clarity of Argument and Reasoning, Logical Arrangement of Arguments, and Relevance to Debate Topic.
156+
157+
For each of the four subdimensions, provide a score from 0 to 1 (with 0 being the lowest and 1 being the highest) for both the **Pro (Affirmative)** side and the **Con (Negative)** side. Additionally, provide a brief analysis for both sides for each subdimension.
158+
159+
Scoring Criteria:
160+
1. **Emotional Appeal**
161+
- How effectively does each side connect with the audience emotionally? Does the argument evoke empathy, passion, or values?
162+
- **0**: No emotional appeal. The argument feels cold or disconnected.
163+
- **1**: Highly engaging emotionally, strongly connects with the audience.
164+
165+
2. **Clarity of Argument and Reasoning**
166+
- Are the arguments clearly presented? Is the reasoning sound and easy to follow?
167+
- **0**: The arguments are unclear or confusing.
168+
- **1**: The arguments are well-structured and easy to understand.
169+
170+
3. **Logical Arrangement of Arguments**
171+
- Is the argument presented in a logical, coherent manner? Does each point flow into the next without confusion?
172+
- **0**: The arguments are disorganized and difficult to follow.
173+
- **1**: The arguments follow a clear and logical progression.
174+
175+
4. **Relevance to Debate Topic**
176+
- Does each argument directly address the debate topic? Are there any irrelevant points or off-topic distractions?
177+
- **0**: Arguments that stray far from the topic.
178+
- **1**: Every argument is focused and relevant to the topic.
179+
180+
Please output the result in the following format:
181+
182+
1. **Pro (Affirmative Side) Score**:
183+
- Emotional Appeal: [score]
184+
- Argument Clarity: [score]
185+
- Argument Arrangement: [score]
186+
- Relevance to Debate Topic: [score]
187+
- **Total Score**: [total score]
188+
189+
2. **Con (Negative Side) Score**:
190+
- Emotional Appeal: [score]
191+
- Argument Clarity: [score]
192+
- Argument Arrangement: [score]
193+
- Relevance to Debate Topic: [score]
194+
- **Total Score**: [total score]
195+
196+
3. **Winner**: [Pro/Con]
197+
4. **Reason**: [Provide detailed analysis based on the scores]
198+
"""
199+
200+
user_prompt = f"""
201+
Evaluate the debate on the topic: '{topic}'
202+
Debate analysis process and arguments are as follows:
203+
{debate_text}
204+
Provide a JSON formatted response with scores and comments for each criterion for both debaters.
205+
"""
206+
207+
response = self.client.models.generate_content(
208+
model="gemini-2.5-flash-lite",
209+
config=genai.types.GenerateContentConfig(
210+
system_instruction=system_prompt,
211+
response_mime_type="application/json",
212+
response_schema=DebateEval,
213+
),
214+
contents=user_prompt,
215+
)
216+
return response.parsed

src/server.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,28 @@ def main():
2020
parser.add_argument("--card-url", type=str, help="URL to advertise in the agent card")
2121
args = parser.parse_args()
2222

23-
# Fill in your agent card
24-
# See: https://a2a-protocol.org/latest/tutorials/python/3-agent-skills-and-card/
25-
2623
skill = AgentSkill(
27-
id="",
28-
name="",
29-
description="",
30-
tags=[],
31-
examples=[]
24+
id="moderate_and_judge_debate",
25+
name="Orchestrates and judges debate",
26+
description="Orchestrate and judge a debate between two agents on a given topic.",
27+
tags=["debate"],
28+
examples=["""
29+
{
30+
"participants": {
31+
"pro_debater": "https://pro-debater.example.com:443",
32+
"con_debater": "https://con-debater.example.org:8443"
33+
},
34+
"config": {
35+
"topic": "Should artificial intelligence be regulated?",
36+
"num_rounds": 3
37+
}
38+
}
39+
"""]
3240
)
3341

3442
agent_card = AgentCard(
35-
name="",
36-
description="",
43+
name="Debate Judge",
44+
description="Orchestrate and judge a structured debate between pro and con agents on a given topic with multiple rounds of arguments.",
3745
url=args.card_url or f"http://{args.host}:{args.port}/",
3846
version='1.0.0',
3947
default_input_modes=['text'],

0 commit comments

Comments
 (0)