Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
)
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.logprobs import Logprob
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.tokenizers import TokenizerLike

from djl_python.outputs import Output
from djl_python.async_utils import create_non_stream_output, create_stream_chunk_output
Expand Down Expand Up @@ -101,7 +101,7 @@ def convert_lmi_schema_to_completion_request(

def convert_completion_logprobs_to_tgi_tokens(
completion_logprobs: CompletionLogProbs,
tokenizer: AnyTokenizer,
tokenizer: TokenizerLike,
) -> List[dict]:
token_logprobs = completion_logprobs.token_logprobs
tokens = completion_logprobs.tokens
Expand Down Expand Up @@ -138,7 +138,7 @@ def convert_completion_response_to_lmi_schema(
response: CompletionResponse,
request: CompletionRequest = None,
include_details: bool = False,
tokenizer: AnyTokenizer = None,
tokenizer: TokenizerLike = None,
) -> Output:
primary_choice = response.choices[0]
lmi_response = {"generated_text": primary_choice.text}
Expand Down Expand Up @@ -258,7 +258,7 @@ def convert_completion_chunk_response_to_lmi_schema(
def lmi_with_details_non_stream_output_formatter(
response: CompletionResponse,
request: CompletionRequest = None,
tokenizer: AnyTokenizer = None,
tokenizer: TokenizerLike = None,
) -> Output:
return convert_completion_response_to_lmi_schema(response,
include_details=True,
Expand All @@ -269,7 +269,7 @@ def lmi_with_details_non_stream_output_formatter(
def lmi_non_stream_output_formatter(
response: CompletionResponse,
request: CompletionRequest = None,
tokenizer: AnyTokenizer = None,
tokenizer: TokenizerLike = None,
) -> Output:
return convert_completion_response_to_lmi_schema(response,
include_details=False,
Expand Down
2 changes: 1 addition & 1 deletion serving/docker/lmi-container-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ sentence-transformers==3.3.1
optimum==1.23.2
llmcompressor==0.9.0.1
mpi4py==4.0.1
https://djl-ai.s3.us-east-1.amazonaws.com/publish/vllm/vllm-0.15.1-cp312-cp312-linux_x86_64.whl
https://djl-ai.s3.us-east-1.amazonaws.com/publish/vllm/vllm-0.16.1.dev6%2Bg369fca226-cp312-cp312-linux_x86_64.whl
lmcache
autoawq
Loading