diff --git a/evalscope/config.py b/evalscope/config.py index 22bfe72e..d2ea3310 100644 --- a/evalscope/config.py +++ b/evalscope/config.py @@ -195,9 +195,10 @@ def __init_default_generation_config(self): 'temperature': 1.0, 'n': 1, } - elif self.eval_type == EvalType.SERVICE: + elif self.eval_type in (EvalType.SERVICE, EvalType.BASE_MODEL): self.generation_config = { 'temperature': 0.0, + 'max_tokens': 512, } if isinstance(self.generation_config, dict): self.generation_config = GenerateConfig.model_validate(self.generation_config) diff --git a/evalscope/constants.py b/evalscope/constants.py index b406aa4b..ec224dba 100644 --- a/evalscope/constants.py +++ b/evalscope/constants.py @@ -71,6 +71,7 @@ class EvalType: MOCK_LLM = 'mock_llm' CHECKPOINT = 'llm_ckpt' # native model checkpoint SERVICE = 'openai_api' # model service + BASE_MODEL = 'base_model' # base model service using completions endpoint TEXT2IMAGE = 'text2image' # image generation service IMAGE_EDITING = 'image_editing' # image editing service diff --git a/evalscope/models/model_apis.py b/evalscope/models/model_apis.py index 8a95a104..f0ffccd7 100644 --- a/evalscope/models/model_apis.py +++ b/evalscope/models/model_apis.py @@ -26,6 +26,13 @@ def server() -> type[ModelAPI]: return OpenAICompatibleAPI +@register_model_api(name='base_model') +def base_model() -> type[ModelAPI]: + from .openai_compatible import OpenAIBaseModelAPI + + return OpenAIBaseModelAPI + + @register_model_api(name='llm_ckpt') def llm_ckpt() -> type[ModelAPI]: check_import('torch', package='torch', raise_error=True, feature_name='llm_ckpt') diff --git a/evalscope/models/openai_compatible.py b/evalscope/models/openai_compatible.py index a69ac88f..7bb54e8d 100644 --- a/evalscope/models/openai_compatible.py +++ b/evalscope/models/openai_compatible.py @@ -1,6 +1,7 @@ import os from openai import APIStatusError, BadRequestError, OpenAI, PermissionDeniedError, UnprocessableEntityError from openai._types import NOT_GIVEN +from openai.types import Completion from openai.types.chat import ChatCompletion from typing import Any, Dict, List, Optional, Tuple, Union @@ -11,13 +12,17 @@ from evalscope.utils.argument_utils import get_supported_params from .utils.openai import ( chat_choices_from_openai, + collect_completion_stream_response, collect_stream_response, + completion_choices_from_openai, model_output_from_openai, + model_output_from_openai_completion, openai_chat_messages, openai_chat_tool_choice, openai_chat_tools, openai_completion_params, openai_handle_bad_request, + openai_prompt_from_messages, ) logger = get_logger() @@ -50,7 +55,7 @@ def __init__( assert self.base_url, f'Base URL for {model_name} not found' # remove trailing slash from base_url - self.base_url = self.base_url.rstrip('/').removesuffix('/chat/completions') + self.base_url = self.base_url.rstrip('/').removesuffix('/chat/completions').removesuffix('/completions') # create http client self.client = OpenAI( @@ -142,3 +147,53 @@ def chat_choices_from_completion(self, completion: ChatCompletion, def handle_bad_request(self, ex: APIStatusError) -> Union[ModelOutput, Exception]: """Hook for subclasses to do bad request handling""" return openai_handle_bad_request(self.model_name, ex) + + +class OpenAIBaseModelAPI(OpenAICompatibleAPI): + """OpenAI compatible API that targets the completions endpoint.""" + + def generate( + self, + input: List[ChatMessage], + tools: List[ToolInfo], + tool_choice: ToolChoice, + config: GenerateConfig, + ) -> ModelOutput: + completion_params = self.completion_params( + config=config, + tools=False, + ) + + request = dict( + prompt=openai_prompt_from_messages(input), + **completion_params, + ) + + self.validate_completion_request_params(request) + + try: + completion = self.client.completions.create(**request) + # handle streaming response + if not isinstance(completion, Completion): + completion = collect_completion_stream_response(completion) + response = completion.model_dump() + self.on_response(response) + + choices = completion_choices_from_openai(completion) + return model_output_from_openai_completion(completion, choices) + + except (BadRequestError, UnprocessableEntityError, PermissionDeniedError) as ex: + return self.handle_bad_request(ex) + + def validate_completion_request_params(self, params: Dict[str, Any]): + """Validate request params for completions endpoint.""" + if not hasattr(self, '_valid_completion_params'): + self._valid_completion_params = get_supported_params(self.client.completions.create) + + extra_body = params.get('extra_body', {}) + for key in list(params.keys()): + if key not in self._valid_completion_params: + extra_body[key] = params.pop(key) + + if extra_body: + params['extra_body'] = extra_body diff --git a/evalscope/models/utils/openai.py b/evalscope/models/utils/openai.py index 892c3d2d..5244bc86 100644 --- a/evalscope/models/utils/openai.py +++ b/evalscope/models/utils/openai.py @@ -4,6 +4,14 @@ from collections import defaultdict from copy import copy from openai import APIStatusError, OpenAIError +from openai.types import Completion + +# Compatibility shim: CompletionChoice location differs across openai versions +try: + from openai.types.completion import Choice as CompletionChoice # type: ignore +except ImportError: # pragma: no cover + from openai.types.completion_choice import CompletionChoice # type: ignore + from openai.types.chat import ( ChatCompletion, ChatCompletionAssistantMessageParam, @@ -157,6 +165,16 @@ def openai_chat_messages( return [openai_chat_message(message, system_role) for message in messages] +def openai_prompt_from_messages(messages: List[ChatMessage]) -> str: + """Flatten chat messages into a simple text prompt for completions API.""" + parts: List[str] = [] + for message in messages: + role = getattr(message, 'role', 'user') + parts.append(f'{role}: {message.text}') + parts.append('assistant:') + return '\n'.join(parts) + + def openai_completion_params(model: str, config: GenerateConfig, tools: bool) -> Dict[str, Any]: params: Dict[str, Any] = dict(model=model) # handle stream option @@ -505,6 +523,40 @@ def chat_message_assistant_from_openai( ) +def completion_choices_from_openai(response: Completion) -> List[ChatCompletionChoice]: + choices = list(response.choices) + choices.sort(key=lambda c: c.index) + return [ + ChatCompletionChoice( + message=ChatMessageAssistant(content=(choice.text or ''), model=response.model, source='generate'), + stop_reason=as_stop_reason(choice.finish_reason), + logprobs=None, + ) for choice in choices + ] + + +def model_output_from_openai_completion( + completion: Completion, + choices: List[ChatCompletionChoice], +) -> ModelOutput: + return ModelOutput( + model=completion.model, + choices=choices, + usage=( + ModelUsage( + input_tokens=completion.usage.prompt_tokens, + output_tokens=completion.usage.completion_tokens, + input_tokens_cache_read=( + completion.usage.prompt_tokens_details.cached_tokens + if completion.usage.prompt_tokens_details is not None else None + ), + reasoning_tokens=None, + total_tokens=completion.usage.total_tokens, + ) if completion.usage else None + ), + ) + + def model_output_from_openai( completion: ChatCompletion, choices: list[ChatCompletionChoice], @@ -616,6 +668,42 @@ def _parse_content_with_internal(content: str, ) -> Tuple[str, Optional[JsonValu ) if internal_match else (content, None)) +def collect_completion_stream_response(response_stream: List[Completion]) -> Completion: + collected_choices = defaultdict(str) + finish_reasons = {} + last_chunk: Optional[Completion] = None + + for chunk in response_stream: + last_chunk = chunk + for choice in chunk.choices: + collected_choices[choice.index] += getattr(choice, 'text', '') or '' + if choice.finish_reason: + finish_reasons[choice.index] = choice.finish_reason + + if last_chunk is None: + raise ValueError('Empty completion stream') + + choices: List[CompletionChoice] = [] + for index, text in collected_choices.items(): + choices.append( + CompletionChoice( + finish_reason=finish_reasons.get(index, 'stop'), + index=index, + logprobs=None, + text=text, + ) + ) + + return Completion( + id=last_chunk.id, + choices=choices, + created=last_chunk.created, + model=last_chunk.model, + object=getattr(last_chunk, 'object', 'completion'), + usage=getattr(last_chunk, 'usage', None), + ) + + def collect_stream_response(response_stream: List[ChatCompletionChunk]) -> ChatCompletion: collected_chunks: List[ChatCompletionChunk] = [] collected_messages = defaultdict(list)