Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions examples/research_with_messages/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Research with Messages

Example demonstrating the use of `task_messages` with multimodal content in OpenAI format.

## Description

This example shows how to send messages with images to SGR Agent Core API. The API accepts messages in OpenAI format, allowing you to include multimodal content (text and images) in your requests.

## Prerequisites

1. SGR Agent Core API server must be running:

```bash
sgr --config-file examples/sgr_deep_research/config.yaml
```

2. The server should be accessible at `http://localhost:8010/v1`

## Usage

Run the example script:

```bash
python examples/research_with_messages/research_with_messages.py
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Предлагаю переименовать пример из research_with_messages во что-то типа: research_with_images - потому что это чуть более точно отображает происходящее. И во всех остальных местах тоже.

```

## Example: Message with Image

The example demonstrates sending a message with both text and an image:

```python
from openai import OpenAI
import base64
from pathlib import Path

client = OpenAI(base_url="http://localhost:8010/v1", api_key="dummy")


def encode_image(image_path: str) -> str:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")


image_path = Path(__file__).parent / "sgr_concept.png"
base64_image = encode_image(str(image_path))

response = client.chat.completions.create(
model="custom_research_agent",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "This is the SGR Agent Core architecture diagram. Explain how Schema-Guided Reasoning works based on this diagram.",
},
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{base64_image}"},
},
],
},
],
stream=True,
)

for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
```

## How It Works

1. **Image Encoding**: The image file is read and encoded to base64 format
2. **Multimodal Content**: The message content is a list containing both text and image parts
3. **Message Format**: The message follows OpenAI's multimodal message format with `type: "text"` and `type: "image_url"`
4. **Agent Processing**: The agent receives the complete message including the image and can analyze it

## Message Format

Messages follow OpenAI's `ChatCompletionMessageParam` format:

- `role`: One of `"system"`, `"user"`, `"assistant"`, or `"tool"`
- `content`: Can be:
- A string for text-only messages
- A list of content parts for multimodal messages:
- `{"type": "text", "text": "..."}` for text
- `{"type": "image_url", "image_url": {"url": "..."}}` for images
- Optional fields: `name`, `tool_calls`, `tool_call_id`

## Notes

- Images must be base64-encoded and prefixed with the data URI scheme (`data:image/png;base64,`)
- The agent receives all messages as-is in `task_messages`
- Prompts are added as separate messages at the end of the context
- All message content is preserved and passed to the agent
Empty file.
42 changes: 42 additions & 0 deletions examples/research_with_messages/research_with_messages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import base64
from pathlib import Path

from openai import OpenAI

client = OpenAI(base_url="http://localhost:8010/v1", api_key="dummy")


def encode_image(image_path: str) -> str:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")


image_path = Path(__file__).parent / "sgr_concept.png"
base64_image = encode_image(str(image_path))

response = client.chat.completions.create(
model="custom_research_agent",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": (
"This is the SGR Agent Core architecture diagram. "
"Explain how Schema-Guided Reasoning works based on this diagram."
),
},
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{base64_image}"},
},
],
},
],
stream=True,
)

for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
Binary file added examples/research_with_messages/sgr_concept.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 4 additions & 3 deletions sgr_agent_core/agent_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import httpx
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam

from sgr_agent_core.agent_config import GlobalConfig
from sgr_agent_core.agent_definition import AgentDefinition, LLMConfig
Expand Down Expand Up @@ -40,12 +41,12 @@ def _create_client(cls, llm_config: LLMConfig) -> AsyncOpenAI:
return AsyncOpenAI(**client_kwargs)

@classmethod
async def create(cls, agent_def: AgentDefinition, task: str) -> Agent:
async def create(cls, agent_def: AgentDefinition, task_messages: list[ChatCompletionMessageParam]) -> Agent:
"""Create an agent instance from a definition.

Args:
agent_def: Agent definition with configuration (classes already resolved)
task: Task for the agent to execute
task_messages: Task messages in OpenAI ChatCompletionMessageParam format

Returns:
Created agent instance
Expand Down Expand Up @@ -88,7 +89,7 @@ async def create(cls, agent_def: AgentDefinition, task: str) -> Agent:

try:
agent = BaseClass(
task=task,
task_messages=task_messages,
def_name=agent_def.name,
toolkit=tools,
openai_client=cls._create_client(agent_def.llm),
Expand Down
4 changes: 2 additions & 2 deletions sgr_agent_core/agents/sgr_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ class SGRAgent(BaseAgent):

def __init__(
self,
task: str,
task_messages: list,
openai_client: AsyncOpenAI,
agent_config: AgentConfig,
toolkit: list[Type[BaseTool]],
def_name: str | None = None,
**kwargs: dict,
):
super().__init__(
task=task,
task_messages=task_messages,
openai_client=openai_client,
agent_config=agent_config,
toolkit=toolkit,
Expand Down
4 changes: 2 additions & 2 deletions sgr_agent_core/agents/sgr_tool_calling_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ class SGRToolCallingAgent(BaseAgent):

def __init__(
self,
task: str,
task_messages: list,
openai_client: AsyncOpenAI,
agent_config: AgentConfig,
toolkit: list[Type[BaseTool]],
def_name: str | None = None,
**kwargs: dict,
):
super().__init__(
task=task,
task_messages=task_messages,
openai_client=openai_client,
agent_config=agent_config,
toolkit=toolkit,
Expand Down
4 changes: 2 additions & 2 deletions sgr_agent_core/agents/tool_calling_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ class ToolCallingAgent(BaseAgent):

def __init__(
self,
task: str,
task_messages: list,
openai_client: AsyncOpenAI,
agent_config: AgentConfig,
toolkit: list[Type[BaseTool]],
def_name: str | None = None,
**kwargs: dict,
):
super().__init__(
task=task,
task_messages=task_messages,
openai_client=openai_client,
agent_config=agent_config,
toolkit=toolkit,
Expand Down
28 changes: 15 additions & 13 deletions sgr_agent_core/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import Type

from openai import AsyncOpenAI, pydantic_function_tool
from openai.types.chat import ChatCompletionFunctionToolParam
from openai.types.chat import ChatCompletionFunctionToolParam, ChatCompletionMessageParam

from sgr_agent_core.agent_definition import AgentConfig
from sgr_agent_core.models import AgentContext, AgentStatesEnum
Expand Down Expand Up @@ -35,7 +35,7 @@ class BaseAgent(AgentRegistryMixin):

def __init__(
self,
task: str,
task_messages: list[ChatCompletionMessageParam],
openai_client: AsyncOpenAI,
agent_config: AgentConfig,
toolkit: list[Type[BaseTool]],
Expand All @@ -46,7 +46,7 @@ def __init__(
self.openai_client = openai_client
self.config = agent_config
self.creation_time = datetime.now()
self.task = task
self.task_messages = task_messages
self.toolkit = toolkit

self._context = AgentContext()
Expand All @@ -56,15 +56,18 @@ def __init__(
self.logger = logging.getLogger(f"sgr_agent_core.agents.{self.id}")
self.log = []

async def provide_clarification(self, clarifications: str):
"""Receive clarification from an external source (e.g. user input)"""
async def provide_clarification(self, messages: list[ChatCompletionMessageParam]):
"""Receive clarification from an external source (e.g. user input) in
OpenAI messages format."""
self.conversation.extend(messages)
self.conversation.append(
{"role": "user", "content": PromptLoader.get_clarification_template(clarifications, self.config.prompts)}
{"role": "user", "content": PromptLoader.get_clarification_template(messages, self.config.prompts)}
)

self._context.clarifications_used += 1
self._context.clarification_received.set()
self._context.state = AgentStatesEnum.RESEARCHING
self.logger.info(f"✅ Clarification received: {clarifications[:2000]}...")
self.logger.info(f"✅ Clarification received: {len(messages)} messages")

def _log_reasoning(self, result: ReasoningTool) -> None:
next_step = result.remaining_steps[0] if result.remaining_steps else "Completing"
Expand Down Expand Up @@ -129,7 +132,7 @@ def _save_agent_log(self):
"model_config": self.config.llm.model_dump(
exclude={"api_key", "proxy"}, mode="json"
), # Sensitive data excluded by default
"task": self.task,
"task_messages": self.task_messages,
"toolkit": [tool.tool_name for tool in self.toolkit],
"log": self.log,
}
Expand All @@ -145,12 +148,11 @@ async def _prepare_context(self) -> list[dict]:
Returns a list of dictionaries OpenAI like format, each
containing a role and content key by default.
"""

return [
{"role": "system", "content": PromptLoader.get_system_prompt(self.toolkit, self.config.prompts)},
{
"role": "user",
"content": PromptLoader.get_initial_user_request(self.task, self.config.prompts),
},
*self.task_messages,
{"role": "user", "content": PromptLoader.get_initial_user_request(self.task_messages, self.config.prompts)},
*self.conversation,
]

Expand Down Expand Up @@ -207,7 +209,7 @@ async def _execution_step(self):
async def execute(
self,
):
self.logger.info(f"🚀 Starting for task: '{self.task}'")
self.logger.info(f"🚀 User provided {len(self.task_messages)} messages.")
try:
while self._context.state not in AgentStatesEnum.FINISH_STATES.value:
self._context.iteration += 1
Expand Down
4 changes: 0 additions & 4 deletions sgr_agent_core/prompts/clarification_response.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
Current Date: {current_date} (Year-Month-Day ISO format: YYYY-MM-DD HH:MM:SS)

CLARIFICATIONS:

{clarifications}
3 changes: 0 additions & 3 deletions sgr_agent_core/prompts/initial_user_request.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1 @@
Current Date: {current_date} (Year-Month-Day ISO format: YYYY-MM-DD HH:MM:SS)
ORIGINAL USER REQUEST:

{task}
23 changes: 7 additions & 16 deletions sgr_agent_core/server/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ async def get_agent_state(agent_id: str):

return AgentStateResponse(
agent_id=agent.id,
task=agent.task,
task_messages=agent.task_messages,
sources_count=len(agent._context.sources),
**agent._context.model_dump(),
)
Expand All @@ -47,7 +47,7 @@ async def get_agents_list():
agents_list = [
AgentListItem(
agent_id=agent.id,
task=agent.task,
task_messages=agent.task_messages,
state=agent._context.state,
creation_time=agent.creation_time,
)
Expand All @@ -73,23 +73,16 @@ async def get_available_models():
return {"data": models_data, "object": "list"}


def extract_user_content_from_messages(messages):
for message in reversed(messages):
if message.role == "user":
return message.content
raise ValueError("User message not found in messages")


@router.post("/agents/{agent_id}/provide_clarification")
async def provide_clarification(agent_id: str, request: ClarificationRequest):
try:
agent = agents_storage.get(agent_id)
if not agent:
raise HTTPException(status_code=404, detail="Agent not found")

logger.info(f"Providing clarification to agent {agent.id}: {request.clarifications[:100]}...")
logger.info(f"Providing clarification to agent {agent.id}: {len(request.messages)} messages")

await agent.provide_clarification(request.clarifications)
await agent.provide_clarification(request.messages)
return StreamingResponse(
agent.streaming_generator.stream(),
media_type="text/event-stream",
Expand Down Expand Up @@ -126,21 +119,19 @@ async def create_chat_completion(request: ChatCompletionRequest):
):
return await provide_clarification(
agent_id=request.model,
request=ClarificationRequest(clarifications=extract_user_content_from_messages(request.messages)),
request=ClarificationRequest(messages=request.messages.root),
)

try:
task = extract_user_content_from_messages(request.messages)

agent_def = next(filter(lambda ad: ad.name == request.model, AgentFactory.get_definitions_list()), None)
if not agent_def:
raise HTTPException(
status_code=400,
detail=f"Invalid model '{request.model}'. "
f"Available models: {[ad.name for ad in AgentFactory.get_definitions_list()]}",
)
agent = await AgentFactory.create(agent_def, task)
logger.info(f"Created agent '{request.model}' for task: {task[:100]}...")
agent = await AgentFactory.create(agent_def, request.messages.root)
logger.info(f"Created agent '{request.model}' with {len(request.messages)} messages")

agents_storage[agent.id] = agent
_ = asyncio.create_task(agent.execute())
Expand Down
Loading