-
Notifications
You must be signed in to change notification settings - Fork 252
feat: add base_model eval type and completions support #1069
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,6 +1,7 @@ | ||||||||||
| import os | ||||||||||
| from openai import APIStatusError, BadRequestError, OpenAI, PermissionDeniedError, UnprocessableEntityError | ||||||||||
| from openai._types import NOT_GIVEN | ||||||||||
| from openai.types import Completion | ||||||||||
| from openai.types.chat import ChatCompletion | ||||||||||
| from typing import Any, Dict, List, Optional, Tuple, Union | ||||||||||
|
|
||||||||||
|
|
@@ -11,13 +12,17 @@ | |||||||||
| from evalscope.utils.argument_utils import get_supported_params | ||||||||||
| from .utils.openai import ( | ||||||||||
| chat_choices_from_openai, | ||||||||||
| collect_completion_stream_response, | ||||||||||
| collect_stream_response, | ||||||||||
| completion_choices_from_openai, | ||||||||||
| model_output_from_openai, | ||||||||||
| model_output_from_openai_completion, | ||||||||||
| openai_chat_messages, | ||||||||||
| openai_chat_tool_choice, | ||||||||||
| openai_chat_tools, | ||||||||||
| openai_completion_params, | ||||||||||
| openai_handle_bad_request, | ||||||||||
| openai_prompt_from_messages, | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| logger = get_logger() | ||||||||||
|
|
@@ -50,7 +55,7 @@ def __init__( | |||||||||
| assert self.base_url, f'Base URL for {model_name} not found' | ||||||||||
|
|
||||||||||
| # remove trailing slash from base_url | ||||||||||
| self.base_url = self.base_url.rstrip('/').removesuffix('/chat/completions') | ||||||||||
| self.base_url = self.base_url.rstrip('/').removesuffix('/chat/completions').removesuffix('/completions') | ||||||||||
|
|
||||||||||
| # create http client | ||||||||||
| self.client = OpenAI( | ||||||||||
|
|
@@ -142,3 +147,53 @@ def chat_choices_from_completion(self, completion: ChatCompletion, | |||||||||
| def handle_bad_request(self, ex: APIStatusError) -> Union[ModelOutput, Exception]: | ||||||||||
| """Hook for subclasses to do bad request handling""" | ||||||||||
| return openai_handle_bad_request(self.model_name, ex) | ||||||||||
|
|
||||||||||
|
|
||||||||||
| class OpenAIBaseModelAPI(OpenAICompatibleAPI): | ||||||||||
| """OpenAI compatible API that targets the completions endpoint.""" | ||||||||||
|
|
||||||||||
| def generate( | ||||||||||
| self, | ||||||||||
| input: List[ChatMessage], | ||||||||||
| tools: List[ToolInfo], | ||||||||||
| tool_choice: ToolChoice, | ||||||||||
| config: GenerateConfig, | ||||||||||
| ) -> ModelOutput: | ||||||||||
| completion_params = self.completion_params( | ||||||||||
| config=config, | ||||||||||
| tools=False, | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| request = dict( | ||||||||||
| prompt=openai_prompt_from_messages(input), | ||||||||||
| **completion_params, | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| self.validate_completion_request_params(request) | ||||||||||
|
|
||||||||||
| try: | ||||||||||
| completion = self.client.completions.create(**request) | ||||||||||
| # handle streaming response | ||||||||||
| if not isinstance(completion, Completion): | ||||||||||
| completion = collect_completion_stream_response(completion) | ||||||||||
| response = completion.model_dump() | ||||||||||
| self.on_response(response) | ||||||||||
|
|
||||||||||
| choices = completion_choices_from_openai(completion) | ||||||||||
| return model_output_from_openai_completion(completion, choices) | ||||||||||
|
|
||||||||||
| except (BadRequestError, UnprocessableEntityError, PermissionDeniedError) as ex: | ||||||||||
| return self.handle_bad_request(ex) | ||||||||||
|
|
||||||||||
| def validate_completion_request_params(self, params: Dict[str, Any]): | ||||||||||
| """Validate request params for completions endpoint.""" | ||||||||||
| if not hasattr(self, '_valid_completion_params'): | ||||||||||
| self._valid_completion_params = get_supported_params(self.client.completions.create) | ||||||||||
|
Comment on lines
+190
to
+191
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's good practice to cache the result of
Suggested change
|
||||||||||
|
|
||||||||||
| extra_body = params.get('extra_body', {}) | ||||||||||
| for key in list(params.keys()): | ||||||||||
| if key not in self._valid_completion_params: | ||||||||||
| extra_body[key] = params.pop(key) | ||||||||||
|
|
||||||||||
| if extra_body: | ||||||||||
| params['extra_body'] = extra_body | ||||||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4,6 +4,14 @@ | |||||
| from collections import defaultdict | ||||||
| from copy import copy | ||||||
| from openai import APIStatusError, OpenAIError | ||||||
| from openai.types import Completion | ||||||
|
|
||||||
| # Compatibility shim: CompletionChoice location differs across openai versions | ||||||
| try: | ||||||
| from openai.types.completion import Choice as CompletionChoice # type: ignore | ||||||
| except ImportError: # pragma: no cover | ||||||
| from openai.types.completion_choice import CompletionChoice # type: ignore | ||||||
|
|
||||||
| from openai.types.chat import ( | ||||||
| ChatCompletion, | ||||||
| ChatCompletionAssistantMessageParam, | ||||||
|
|
@@ -157,6 +165,16 @@ def openai_chat_messages( | |||||
| return [openai_chat_message(message, system_role) for message in messages] | ||||||
|
|
||||||
|
|
||||||
| def openai_prompt_from_messages(messages: List[ChatMessage]) -> str: | ||||||
| """Flatten chat messages into a simple text prompt for completions API.""" | ||||||
| parts: List[str] = [] | ||||||
| for message in messages: | ||||||
| role = getattr(message, 'role', 'user') | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using
Suggested change
|
||||||
| parts.append(f'{role}: {message.text}') | ||||||
| parts.append('assistant:') | ||||||
| return '\n'.join(parts) | ||||||
|
|
||||||
|
|
||||||
| def openai_completion_params(model: str, config: GenerateConfig, tools: bool) -> Dict[str, Any]: | ||||||
| params: Dict[str, Any] = dict(model=model) | ||||||
| # handle stream option | ||||||
|
|
@@ -505,6 +523,40 @@ def chat_message_assistant_from_openai( | |||||
| ) | ||||||
|
|
||||||
|
|
||||||
| def completion_choices_from_openai(response: Completion) -> List[ChatCompletionChoice]: | ||||||
| choices = list(response.choices) | ||||||
| choices.sort(key=lambda c: c.index) | ||||||
| return [ | ||||||
| ChatCompletionChoice( | ||||||
| message=ChatMessageAssistant(content=(choice.text or ''), model=response.model, source='generate'), | ||||||
| stop_reason=as_stop_reason(choice.finish_reason), | ||||||
| logprobs=None, | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||||||
| ) for choice in choices | ||||||
| ] | ||||||
|
|
||||||
|
|
||||||
| def model_output_from_openai_completion( | ||||||
| completion: Completion, | ||||||
| choices: List[ChatCompletionChoice], | ||||||
| ) -> ModelOutput: | ||||||
| return ModelOutput( | ||||||
| model=completion.model, | ||||||
| choices=choices, | ||||||
| usage=( | ||||||
| ModelUsage( | ||||||
| input_tokens=completion.usage.prompt_tokens, | ||||||
| output_tokens=completion.usage.completion_tokens, | ||||||
| input_tokens_cache_read=( | ||||||
| completion.usage.prompt_tokens_details.cached_tokens | ||||||
| if completion.usage.prompt_tokens_details is not None else None | ||||||
| ), | ||||||
| reasoning_tokens=None, | ||||||
| total_tokens=completion.usage.total_tokens, | ||||||
| ) if completion.usage else None | ||||||
| ), | ||||||
| ) | ||||||
|
|
||||||
|
|
||||||
| def model_output_from_openai( | ||||||
| completion: ChatCompletion, | ||||||
| choices: list[ChatCompletionChoice], | ||||||
|
|
@@ -616,6 +668,42 @@ def _parse_content_with_internal(content: str, ) -> Tuple[str, Optional[JsonValu | |||||
| ) if internal_match else (content, None)) | ||||||
|
|
||||||
|
|
||||||
| def collect_completion_stream_response(response_stream: List[Completion]) -> Completion: | ||||||
| collected_choices = defaultdict(str) | ||||||
| finish_reasons = {} | ||||||
| last_chunk: Optional[Completion] = None | ||||||
|
|
||||||
| for chunk in response_stream: | ||||||
| last_chunk = chunk | ||||||
| for choice in chunk.choices: | ||||||
| collected_choices[choice.index] += getattr(choice, 'text', '') or '' | ||||||
| if choice.finish_reason: | ||||||
| finish_reasons[choice.index] = choice.finish_reason | ||||||
|
|
||||||
| if last_chunk is None: | ||||||
| raise ValueError('Empty completion stream') | ||||||
|
|
||||||
| choices: List[CompletionChoice] = [] | ||||||
| for index, text in collected_choices.items(): | ||||||
| choices.append( | ||||||
| CompletionChoice( | ||||||
| finish_reason=finish_reasons.get(index, 'stop'), | ||||||
| index=index, | ||||||
| logprobs=None, | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the non-streaming case, |
||||||
| text=text, | ||||||
| ) | ||||||
| ) | ||||||
|
|
||||||
| return Completion( | ||||||
| id=last_chunk.id, | ||||||
| choices=choices, | ||||||
| created=last_chunk.created, | ||||||
| model=last_chunk.model, | ||||||
| object=getattr(last_chunk, 'object', 'completion'), | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Suggested change
|
||||||
| usage=getattr(last_chunk, 'usage', None), | ||||||
| ) | ||||||
|
|
||||||
|
|
||||||
| def collect_stream_response(response_stream: List[ChatCompletionChunk]) -> ChatCompletion: | ||||||
| collected_chunks: List[ChatCompletionChunk] = [] | ||||||
| collected_messages = defaultdict(list) | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change introduces a default
max_tokens: 512forEvalType.SERVICEin addition to the newEvalType.BASE_MODEL. Previously,EvalType.SERVICEdid not have a defaultmax_tokens, so it would use the model's default. This is a behavioral change that could unexpectedly truncate outputs for existing users ofEvalType.SERVICE. The pull request description states "Zero breaking changes to existingserver/openai_apibehavior", but this change seems to contradict that. To avoid breaking changes, you might want to apply this default only toEvalType.BASE_MODEL.