|
48 | 48 | from mcp_agent.tracing.telemetry import get_tracer, is_otel_serializable, telemetry |
49 | 49 | from mcp_agent.tracing.token_tracking_decorator import track_tokens |
50 | 50 | from mcp_agent.utils.common import ensure_serializable, typed_dict_extras, to_string |
51 | | -from mcp_agent.utils.pydantic_type_serializer import serialize_model, deserialize_model |
| 51 | + |
52 | 52 | from mcp_agent.workflows.llm.augmented_llm import ( |
53 | 53 | AugmentedLLM, |
54 | 54 | ModelT, |
@@ -83,15 +83,6 @@ class RequestCompletionRequest(BaseModel): |
83 | 83 | payload: dict |
84 | 84 |
|
85 | 85 |
|
86 | | -class RequestStructuredCompletionRequest(BaseModel): |
87 | | - config: AnthropicSettings |
88 | | - params: RequestParams |
89 | | - response_model: Type[ModelT] | None = None |
90 | | - serialized_response_model: str | None = None |
91 | | - response_str: str |
92 | | - model: str |
93 | | - |
94 | | - |
95 | 86 | def create_anthropic_instance(settings: AnthropicSettings): |
96 | 87 | """Select and initialise the appropriate anthropic client instance based on settings""" |
97 | 88 | if settings.provider == "bedrock": |
@@ -419,68 +410,86 @@ async def generate_structured( |
419 | 410 | response_model: Type[ModelT], |
420 | 411 | request_params: RequestParams | None = None, |
421 | 412 | ) -> ModelT: |
422 | | - # First we invoke the LLM to generate a string response |
423 | | - # We need to do this in a two-step process because Instructor doesn't |
424 | | - # know how to invoke MCP tools via call_tool, so we'll handle all the |
425 | | - # processing first and then pass the final response through Instructor |
| 413 | + # Use Anthropic's native structured output via a forced tool call carrying JSON input |
| 414 | + import json |
| 415 | + |
426 | 416 | tracer = get_tracer(self.context) |
427 | 417 | with tracer.start_as_current_span( |
428 | 418 | f"{self.__class__.__name__}.{self.name}.generate_structured" |
429 | 419 | ) as span: |
430 | 420 | span.set_attribute(GEN_AI_AGENT_NAME, self.agent.name) |
431 | 421 | self._annotate_span_for_generation_message(span, message) |
432 | 422 |
|
433 | | - response = await self.generate_str( |
434 | | - message=message, |
435 | | - request_params=request_params, |
436 | | - ) |
437 | | - |
438 | 423 | params = self.get_request_params(request_params) |
439 | | - |
440 | 424 | if self.context.tracing_enabled: |
441 | 425 | AugmentedLLM.annotate_span_with_request_params(span, params) |
442 | 426 |
|
443 | | - model = await self.select_model(params) |
444 | | - span.set_attribute(GEN_AI_REQUEST_MODEL, model) |
445 | | - |
446 | | - span.set_attribute("response_model", response_model.__name__) |
447 | | - |
448 | | - serialized_response_model: str | None = None |
449 | | - |
450 | | - if self.executor and self.executor.execution_engine == "temporal": |
451 | | - # Serialize the response model to a string |
452 | | - serialized_response_model = serialize_model(response_model) |
453 | | - |
454 | | - structured_response = await self.executor.execute( |
455 | | - AnthropicCompletionTasks.request_structured_completion_task, |
456 | | - RequestStructuredCompletionRequest( |
457 | | - config=self.context.config.anthropic, |
458 | | - params=params, |
459 | | - response_model=response_model |
460 | | - if not serialized_response_model |
461 | | - else None, |
462 | | - serialized_response_model=serialized_response_model, |
463 | | - response_str=response, |
464 | | - model=model, |
465 | | - ), |
| 427 | + model_name = ( |
| 428 | + await self.select_model(params) or self.default_request_params.model |
466 | 429 | ) |
| 430 | + span.set_attribute(GEN_AI_REQUEST_MODEL, model_name) |
467 | 431 |
|
468 | | - # TODO: saqadri (MAC) - fix request_structured_completion_task to return ensure_serializable |
469 | | - # Convert dict back to the proper model instance if needed |
470 | | - if isinstance(structured_response, dict): |
471 | | - structured_response = response_model.model_validate(structured_response) |
| 432 | + # Convert message(s) to Anthropic format |
| 433 | + messages: List[MessageParam] = [] |
| 434 | + if params.use_history: |
| 435 | + messages.extend(self.history.get()) |
| 436 | + messages.extend( |
| 437 | + AnthropicConverter.convert_mixed_messages_to_anthropic(message) |
| 438 | + ) |
472 | 439 |
|
473 | | - if self.context.tracing_enabled: |
474 | | - try: |
475 | | - span.set_attribute( |
476 | | - "structured_response_json", |
477 | | - structured_response.model_dump_json(), |
478 | | - ) |
479 | | - # pylint: disable=broad-exception-caught |
480 | | - except Exception: |
481 | | - span.set_attribute("unstructured_response", response) |
| 440 | + # Define a single tool that matches the Pydantic schema |
| 441 | + schema = response_model.model_json_schema() |
| 442 | + tools: List[ToolParam] = [ |
| 443 | + { |
| 444 | + "name": "return_structured_output", |
| 445 | + "description": "Return the response in the required JSON format", |
| 446 | + "input_schema": schema, |
| 447 | + } |
| 448 | + ] |
| 449 | + |
| 450 | + args = { |
| 451 | + "model": model_name, |
| 452 | + "messages": messages, |
| 453 | + "system": self.instruction or params.systemPrompt, |
| 454 | + "tools": tools, |
| 455 | + "tool_choice": {"type": "tool", "name": "return_structured_output"}, |
| 456 | + } |
| 457 | + if params.maxTokens is not None: |
| 458 | + args["max_tokens"] = params.maxTokens |
| 459 | + if params.stopSequences: |
| 460 | + args["stop_sequences"] = params.stopSequences |
| 461 | + |
| 462 | + # Call Anthropic directly (one-turn streaming for consistency) |
| 463 | + base_url = None |
| 464 | + if self.context and self.context.config and self.context.config.anthropic: |
| 465 | + base_url = self.context.config.anthropic.base_url |
| 466 | + api_key = self.context.config.anthropic.api_key |
| 467 | + client = AsyncAnthropic(api_key=api_key, base_url=base_url) |
| 468 | + else: |
| 469 | + client = AsyncAnthropic() |
| 470 | + |
| 471 | + async with client: |
| 472 | + async with client.messages.stream(**args) as stream: |
| 473 | + final = await stream.get_final_message() |
| 474 | + |
| 475 | + # Extract tool_use input and validate |
| 476 | + for block in final.content: |
| 477 | + if ( |
| 478 | + getattr(block, "type", None) == "tool_use" |
| 479 | + and getattr(block, "name", "") == "return_structured_output" |
| 480 | + ): |
| 481 | + data = getattr(block, "input", None) |
| 482 | + try: |
| 483 | + if isinstance(data, str): |
| 484 | + return response_model.model_validate(json.loads(data)) |
| 485 | + return response_model.model_validate(data) |
| 486 | + except Exception: |
| 487 | + # Fallthrough to error |
| 488 | + break |
482 | 489 |
|
483 | | - return structured_response |
| 490 | + raise ValueError( |
| 491 | + "Failed to obtain structured output from Anthropic response" |
| 492 | + ) |
484 | 493 |
|
485 | 494 | @classmethod |
486 | 495 | def convert_message_to_message_param( |
@@ -770,44 +779,6 @@ async def request_completion_task( |
770 | 779 | response = ensure_serializable(response) |
771 | 780 | return response |
772 | 781 |
|
773 | | - @staticmethod |
774 | | - @workflow_task |
775 | | - @telemetry.traced() |
776 | | - async def request_structured_completion_task( |
777 | | - request: RequestStructuredCompletionRequest, |
778 | | - ): |
779 | | - """ |
780 | | - Request a structured completion using Instructor's Anthropic API. |
781 | | - """ |
782 | | - import instructor |
783 | | - |
784 | | - if request.response_model: |
785 | | - response_model = request.response_model |
786 | | - elif request.serialized_response_model: |
787 | | - response_model = deserialize_model(request.serialized_response_model) |
788 | | - else: |
789 | | - raise ValueError( |
790 | | - "Either response_model or serialized_response_model must be provided for structured completion." |
791 | | - ) |
792 | | - |
793 | | - # We pass the text through instructor to extract structured data |
794 | | - client = instructor.from_anthropic(create_anthropic_instance(request.config)) |
795 | | - |
796 | | - # Extract structured data from natural language without blocking the loop |
797 | | - loop = asyncio.get_running_loop() |
798 | | - structured_response = await loop.run_in_executor( |
799 | | - None, |
800 | | - functools.partial( |
801 | | - client.chat.completions.create, |
802 | | - model=request.model, |
803 | | - response_model=response_model, |
804 | | - messages=[{"role": "user", "content": request.response_str}], |
805 | | - max_tokens=request.params.maxTokens, |
806 | | - ), |
807 | | - ) |
808 | | - |
809 | | - return structured_response |
810 | | - |
811 | 782 |
|
812 | 783 | class AnthropicMCPTypeConverter(ProviderToMCPConverter[MessageParam, Message]): |
813 | 784 | """ |
|
0 commit comments