-
Notifications
You must be signed in to change notification settings - Fork 87
fix lmi/vllm virtual envs, update to vllm 0.7.1 #2703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,16 +21,11 @@ | |
| resolve_chat_template_content_format) | ||
|
|
||
|
|
||
| def is_chat_completions_request(inputs: Dict) -> bool: | ||
| return "messages" in inputs | ||
|
|
||
|
|
||
| def parse_chat_completions_request_vllm( | ||
| input_map: Dict, | ||
| is_rolling_batch: bool, | ||
| rolling_batch, | ||
| tokenizer, | ||
| chat_template: Optional[str] = None, | ||
| configs: Properties = None, | ||
| is_mistral_tokenizer: bool = False, | ||
| ): | ||
|
|
@@ -41,12 +36,6 @@ def parse_chat_completions_request_vllm( | |
| "You must enable rolling batch to use the chat completions format." | ||
| ) | ||
|
|
||
| if not is_mistral_tokenizer and not hasattr(tokenizer, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. deleted because the vllm utils do this validation for us already |
||
| "apply_chat_template"): | ||
| raise AttributeError( | ||
| f"Cannot provide chat completion for tokenizer: {tokenizer.__class__}, " | ||
| f"please ensure that your tokenizer supports chat templates.") | ||
|
|
||
| tool_parser = rolling_batch.get_tool_parser() | ||
| chat_params = ChatProperties(**input_map) | ||
|
|
||
|
|
@@ -85,16 +74,15 @@ def parse_chat_completions_request_vllm( | |
| if is_mistral_tokenizer: | ||
| text_inputs = apply_mistral_chat_template( | ||
| tokenizer, | ||
| messages=chat_params.messages, | ||
| chat_template=chat_template, | ||
| add_generation_prompt=True, | ||
| chat_params.messages, | ||
| None, | ||
| tools=tool_dicts, | ||
| ) | ||
| else: | ||
| text_inputs = apply_hf_chat_template( | ||
| tokenizer, | ||
| conversation=conversation, | ||
| chat_template=chat_template, | ||
| conversation, | ||
| None, | ||
| add_generation_prompt=True, | ||
| tools=tool_dicts, | ||
| ) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,3 @@ | ||
| -r requirements-common.txt | ||
| vllm==0.7.0 | ||
| llmcompressor | ||
| vllm==0.7.1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,12 +7,6 @@ requirements_file=$2 | |
| # This was copied over from the previous pip install defined in the lmi.Dockerfile, so it's specific to that Dockerfile | ||
| python -m venv --system-site-packages $venv_directory | ||
| venv_pip="${venv_directory}/bin/pip" | ||
| $venv_pip install -r $requirements_file | ||
| $venv_pip install -r $requirements_file || exit 1 | ||
| $venv_pip install https://publish.djl.ai/djl_converter/djl_converter-0.31.0-py3-none-any.whl --no-deps | ||
| git clone https://github.com/neuralmagic/AutoFP8.git | ||
| cd AutoFP8 | ||
| git reset --hard 4b2092c | ||
| $venv_pip install . | ||
| cd .. | ||
| rm -rf AutoFP8 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we not need FP8 installation?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not anymore! we're using llm compressor now #2701 |
||
| $venv_pip cache purge | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
deleted because it's not used