Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
[project]
name = "green-agent-template"
name = "debate-judge"
version = "0.1.0"
description = "A template for A2A green agents"
description = "A2A agent that orchestrates and judges debate"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"a2a-sdk[http-server]>=0.3.20",
"google-genai>=1.55.0",
"pydantic>=2.12.5",
"python-dotenv>=1.2.1",
"uvicorn>=0.38.0",
]

Expand Down
187 changes: 164 additions & 23 deletions src/agent.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,48 @@
from typing import Any
import logging
from typing import Any, Literal
from pydantic import BaseModel, HttpUrl, ValidationError
from dotenv import load_dotenv

from a2a.server.tasks import TaskUpdater
from a2a.types import Message, TaskState, Part, TextPart, DataPart
from a2a.utils import get_message_text, new_agent_text_message
from google import genai

from messenger import Messenger


load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("debate_judge")


class EvalRequest(BaseModel):
"""Request format sent by the AgentBeats platform to green agents."""
participants: dict[str, HttpUrl] # role -> agent URL
config: dict[str, Any]

class DebaterScore(BaseModel):
emotional_appeal: float
argument_clarity: float
argument_arrangement: float
relevance_to_topic: float
total_score: float

class DebateEval(BaseModel):
pro_debater: DebaterScore
con_debater: DebaterScore
winner: Literal["pro_debater", "con_debater"]
reason: str


class Agent:
# Fill in: list of required participant roles, e.g. ["pro_debater", "con_debater"]
required_roles: list[str] = []
# Fill in: list of required config keys, e.g. ["topic", "num_rounds"]
required_config_keys: list[str] = []
required_roles: list[str] = ["pro_debater", "con_debater"]
required_config_keys: list[str] = ["topic", "num_rounds"]

def __init__(self):
self.messenger = Messenger()
# Initialize other state here
self.client = genai.Client()

def validate_request(self, request: EvalRequest) -> tuple[bool, str]:
missing_roles = set(self.required_roles) - set(request.participants.keys())
Expand All @@ -32,19 +53,14 @@ def validate_request(self, request: EvalRequest) -> tuple[bool, str]:
if missing_config_keys:
return False, f"Missing config keys: {missing_config_keys}"

# Add additional request validation here
try:
int(request.config["num_rounds"])
except Exception as e:
return False, f"Can't parse num_rounds: {e}"

return True, "ok"

async def run(self, message: Message, updater: TaskUpdater) -> None:
"""Implement your agent logic here.

Args:
message: The incoming message
updater: Report progress (update_status) and results (add_artifact)

Use self.messenger.talk_to_agent(message, url) to call other agents.
"""
input_text = get_message_text(message)

try:
Expand All @@ -57,19 +73,144 @@ async def run(self, message: Message, updater: TaskUpdater) -> None:
await updater.reject(new_agent_text_message(f"Invalid request: {e}"))
return

# Replace example code below with your agent logic
# Use request.participants to get participant agent URLs by role
# Use request.config for assessment parameters
await updater.update_status(
TaskState.working,
new_agent_text_message(f"Starting assessment.\n{request.model_dump_json()}")
)

debate = await self.orchestrate_debate(
request.participants, request.config["topic"], request.config["num_rounds"], updater
)

debate_text = ""
for i, (pro, con) in enumerate(
zip(debate["pro_debater"], debate["con_debater"]), start=1
):
debate_text += f"Pro Argument {i}: {pro}\n"
debate_text += f"Con Argument {i}: {con}\n"

await updater.update_status(
TaskState.working, new_agent_text_message("Thinking...")
TaskState.working,
new_agent_text_message(f"Debate orchestration finished. Starting evaluation.")
)
logger.info("Debate orchestration finished. Evaluating debate.")

debate_eval: DebateEval = await self.judge_debate(request.config["topic"], debate_text)
logger.info(f"Debate Evaluation:\n{debate_eval.model_dump_json()}")

await updater.add_artifact(
parts=[
Part(root=TextPart(text="The agent performed well.")),
Part(root=DataPart(data={
# structured assessment results
}))
Part(root=TextPart(text=debate_eval.reason)),
Part(root=DataPart(data=debate_eval.model_dump())),
],
name="Result",
)

async def orchestrate_debate(
self,
participants: dict[str, str],
topic: str,
num_rounds: int,
updater: TaskUpdater,
) -> dict[str, list[str]]:
debate: dict[str, list[str]] = {"pro_debater": [], "con_debater": []}

async def turn(role: str, prompt: str) -> str:
response = await self.messenger.talk_to_agent(
prompt, str(participants[role]), new_conversation=False
)
logger.info(f"{role}: {response}")
debate[role].append(response)
await updater.update_status(
TaskState.working, new_agent_text_message(f"{role}: {response}")
)
return response

# Opening turns
response = await turn(
"pro_debater", f"Debate Topic: {topic}. Present your opening argument."
)
response = await turn(
"con_debater",
f"Debate Topic: {topic}. Present your opening argument. Your opponent opened with: {response}",
)

# Remaining rounds
for _ in range(num_rounds - 1):
response = await turn(
"pro_debater",
f"Your opponent said: {response}. Present your next argument.",
)
response = await turn(
"con_debater",
f"Your opponent said: {response}. Present your next argument.",
)

return debate

async def judge_debate(self, topic: str, debate_text: str) -> DebateEval:
# prompt adapted from InspireScore: https://github.com/fywang12/InspireDebate/blob/main/inspirescore.py

system_prompt = """
You are an experienced debate judge tasked with evaluating debates. For each debate, you will assess both sides based on four key criteria: Emotional Appeal, Clarity of Argument and Reasoning, Logical Arrangement of Arguments, and Relevance to Debate Topic.

For each of the four subdimensions, provide a score from 0 to 1 (with 0 being the lowest and 1 being the highest) for both the **Pro (Affirmative)** side and the **Con (Negative)** side. Additionally, provide a brief analysis for both sides for each subdimension.

Scoring Criteria:
1. **Emotional Appeal**
- How effectively does each side connect with the audience emotionally? Does the argument evoke empathy, passion, or values?
- **0**: No emotional appeal. The argument feels cold or disconnected.
- **1**: Highly engaging emotionally, strongly connects with the audience.

2. **Clarity of Argument and Reasoning**
- Are the arguments clearly presented? Is the reasoning sound and easy to follow?
- **0**: The arguments are unclear or confusing.
- **1**: The arguments are well-structured and easy to understand.

3. **Logical Arrangement of Arguments**
- Is the argument presented in a logical, coherent manner? Does each point flow into the next without confusion?
- **0**: The arguments are disorganized and difficult to follow.
- **1**: The arguments follow a clear and logical progression.

4. **Relevance to Debate Topic**
- Does each argument directly address the debate topic? Are there any irrelevant points or off-topic distractions?
- **0**: Arguments that stray far from the topic.
- **1**: Every argument is focused and relevant to the topic.

Please output the result in the following format:

1. **Pro (Affirmative Side) Score**:
- Emotional Appeal: [score]
- Argument Clarity: [score]
- Argument Arrangement: [score]
- Relevance to Debate Topic: [score]
- **Total Score**: [total score]

2. **Con (Negative Side) Score**:
- Emotional Appeal: [score]
- Argument Clarity: [score]
- Argument Arrangement: [score]
- Relevance to Debate Topic: [score]
- **Total Score**: [total score]

3. **Winner**: [Pro/Con]
4. **Reason**: [Provide detailed analysis based on the scores]
"""

user_prompt = f"""
Evaluate the debate on the topic: '{topic}'
Debate analysis process and arguments are as follows:
{debate_text}
Provide a JSON formatted response with scores and comments for each criterion for both debaters.
"""

response = self.client.models.generate_content(
model="gemini-2.5-flash-lite",
config=genai.types.GenerateContentConfig(
system_instruction=system_prompt,
response_mime_type="application/json",
response_schema=DebateEval,
),
contents=user_prompt,
)
return response.parsed
28 changes: 18 additions & 10 deletions src/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,28 @@ def main():
parser.add_argument("--card-url", type=str, help="URL to advertise in the agent card")
args = parser.parse_args()

# Fill in your agent card
# See: https://a2a-protocol.org/latest/tutorials/python/3-agent-skills-and-card/

skill = AgentSkill(
id="",
name="",
description="",
tags=[],
examples=[]
id="moderate_and_judge_debate",
name="Orchestrates and judges debate",
description="Orchestrate and judge a debate between two agents on a given topic.",
tags=["debate"],
examples=["""
{
"participants": {
"pro_debater": "https://pro-debater.example.com:443",
"con_debater": "https://con-debater.example.org:8443"
},
"config": {
"topic": "Should artificial intelligence be regulated?",
"num_rounds": 3
}
}
"""]
)

agent_card = AgentCard(
name="",
description="",
name="Debate Judge",
description="Orchestrate and judge a structured debate between pro and con agents on a given topic with multiple rounds of arguments.",
url=args.card_url or f"http://{args.host}:{args.port}/",
version='1.0.0',
default_input_modes=['text'],
Expand Down
Loading