diff --git a/serving/docker/lmi-container-requirements.txt b/serving/docker/lmi-container-requirements.txt index 09b41ec30..dbb2dabad 100644 --- a/serving/docker/lmi-container-requirements.txt +++ b/serving/docker/lmi-container-requirements.txt @@ -32,7 +32,7 @@ uvloop ninja peft llmcompressor -https://vllm-wheels.s3.us-west-2.amazonaws.com/93103575ce0480f36fc1a3603eb51d9a89f38a00/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl +vllm==0.11.1 xgrammar flashinfer-python==0.5.2 lmcache \ No newline at end of file diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py index 8a81a7000..71ae24bf7 100644 --- a/tests/integration/llm/prepare.py +++ b/tests/integration/llm/prepare.py @@ -162,6 +162,7 @@ '{"method":"eagle","model":"yuhuili/EAGLE-LLaMA3.1-Instruct-8B","num_speculative_tokens":4}', "option.tensor_parallel_degree": 4, "option.max_rolling_batch_size": 4, + "option.enforce_eager": True, }, "llama-7b-unmerged-lora": { "option.model_id": "s3://djl-llm/huggyllama-llama-7b",