|
68 | 68 | BuiltinTool, |
69 | 69 | ToolCall, |
70 | 70 | ) |
71 | | -from llama_stack.providers.utils.inference.openai_compat import ( |
72 | | - convert_message_to_openai_dict, |
73 | | - convert_openai_chat_completion_stream, |
74 | | - convert_tooldef_to_openai_tool, |
75 | | -) |
76 | 71 | from llama_stack.providers.utils.kvstore import KVStore |
77 | 72 | from llama_stack.providers.utils.telemetry import tracing |
78 | 73 |
|
@@ -515,60 +510,16 @@ async def _run( |
515 | 510 | async with tracing.span("inference") as span: |
516 | 511 | if self.agent_config.name: |
517 | 512 | span.set_attribute("agent_name", self.agent_config.name) |
518 | | - # Convert messages to OpenAI format |
519 | | - openai_messages = [] |
520 | | - for message in input_messages: |
521 | | - openai_message = await convert_message_to_openai_dict(message) |
522 | | - openai_messages.append(openai_message) |
523 | | - |
524 | | - # Convert tool definitions to OpenAI format |
525 | | - openai_tools = None |
526 | | - if self.tool_defs: |
527 | | - openai_tools = [] |
528 | | - for tool_def in self.tool_defs: |
529 | | - openai_tool = convert_tooldef_to_openai_tool(tool_def) |
530 | | - openai_tools.append(openai_tool) |
531 | | - |
532 | | - # Extract tool_choice from tool_config for OpenAI compatibility |
533 | | - # Note: tool_choice can only be provided when tools are also provided |
534 | | - tool_choice = None |
535 | | - if openai_tools and self.agent_config.tool_config and self.agent_config.tool_config.tool_choice: |
536 | | - tool_choice = ( |
537 | | - self.agent_config.tool_config.tool_choice.value |
538 | | - if hasattr(self.agent_config.tool_config.tool_choice, "value") |
539 | | - else str(self.agent_config.tool_config.tool_choice) |
540 | | - ) |
541 | | - |
542 | | - # Convert sampling params to OpenAI format (temperature, top_p, max_tokens) |
543 | | - temperature = None |
544 | | - top_p = None |
545 | | - max_tokens = None |
546 | | - if sampling_params: |
547 | | - if hasattr(sampling_params.strategy, "temperature"): |
548 | | - temperature = sampling_params.strategy.temperature |
549 | | - if hasattr(sampling_params.strategy, "top_p"): |
550 | | - top_p = sampling_params.strategy.top_p |
551 | | - if sampling_params.max_tokens: |
552 | | - max_tokens = sampling_params.max_tokens |
553 | | - |
554 | | - # Use OpenAI chat completion |
555 | | - openai_stream = await self.inference_api.openai_chat_completion( |
556 | | - model=self.agent_config.model, |
557 | | - messages=openai_messages, |
558 | | - tools=openai_tools if openai_tools else None, |
559 | | - tool_choice=tool_choice, |
560 | | - temperature=temperature, |
561 | | - top_p=top_p, |
562 | | - max_tokens=max_tokens, |
| 513 | + async for chunk in await self.inference_api.chat_completion( |
| 514 | + self.agent_config.model, |
| 515 | + input_messages, |
| 516 | + tools=self.tool_defs, |
| 517 | + tool_prompt_format=self.agent_config.tool_config.tool_prompt_format, |
| 518 | + response_format=self.agent_config.response_format, |
563 | 519 | stream=True, |
564 | | - ) |
565 | | - |
566 | | - # Convert OpenAI stream back to Llama Stack format |
567 | | - response_stream = convert_openai_chat_completion_stream( |
568 | | - openai_stream, enable_incremental_tool_calls=True |
569 | | - ) |
570 | | - |
571 | | - async for chunk in response_stream: |
| 520 | + sampling_params=sampling_params, |
| 521 | + tool_config=self.agent_config.tool_config, |
| 522 | + ): |
572 | 523 | event = chunk.event |
573 | 524 | if event.event_type == ChatCompletionResponseEventType.start: |
574 | 525 | continue |
|
0 commit comments