diff --git a/serving/docs/lmi/user_guides/vllm_user_guide.md b/serving/docs/lmi/user_guides/vllm_user_guide.md
index 81bb76020..501fef5d3 100644
--- a/serving/docs/lmi/user_guides/vllm_user_guide.md
+++ b/serving/docs/lmi/user_guides/vllm_user_guide.md
@@ -182,7 +182,7 @@ These are supported in LMI.
 For example, if you want to enable the `speculative_config`, you can do:
 
 * `option.speculative_config={"model": "meta-llama/Llama3.2-1B-Instruct", "num_speculative_tokens": 5}`
-* `OPTION_SPECULATIVE_CONFIG={"model": "meta-llama/Llama3.2-1B-Instruct", "num_speculative_tokens": 5}`
+* `OPTION_SPECULATIVE_CONFIG='{"model": "meta-llama/Llama3.2-1B-Instruct", "num_speculative_tokens": 5}'`
 
 
 ## Custom Handlers
diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py
index 0d7b0caf2..e02703743 100644
--- a/tests/integration/llm/client.py
+++ b/tests/integration/llm/client.py
@@ -284,10 +284,10 @@ def get_model_name():
         "seq_length": [256],
         "tokenizer": "JackFram/llama-68m"
     },
-    "llama-68m-speculative-eagle": {
+    "llama3-1-8b-speculative-eagle": {
         "batch_size": [1, 4],
         "seq_length": [256],
-        "tokenizer": "JackFram/llama-68m"
+        "tokenizer": "unsloth/Meta-Llama-3.1-8B"
     },
     "llama-7b-unmerged-lora": {
         "batch_size": [3],
diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py
index 9904822fd..33cde8ae6 100644
--- a/tests/integration/llm/prepare.py
+++ b/tests/integration/llm/prepare.py
@@ -366,19 +366,17 @@
     "llama-68m-speculative-medusa": {
         "option.model_id": "s3://djl-llm/llama-68m/",
         "option.task": "text-generation",
-        "option.speculative_model": "abhigoyal/vllm-medusa-llama-68m-random",
-        "option.num_speculative_tokens": 4,
-        "option.use_v2_block_manager": True,
+        "option.speculative_config":
+        '{"method":"medusa","model":"abhigoyal/vllm-medusa-llama-68m-random","num_speculative_tokens":4}',
         "option.tensor_parallel_degree": 1,
         "option.max_rolling_batch_size": 4,
     },
-    "llama-68m-speculative-eagle": {
-        "option.model_id": "s3://djl-llm/llama-68m/",
+    "llama3-1-8b-speculative-eagle": {
+        "option.model_id": "s3://djl-llm/llama-3.1-8b-hf/",
         "option.task": "text-generation",
-        "option.speculative_model": "abhigoyal/vllm-eagle-llama-68m-random",
-        "option.num_speculative_tokens": 4,
-        "option.use_v2_block_manager": True,
-        "option.tensor_parallel_degree": 1,
+        "option.speculative_config":
+        '{"method":"eagle","model":"yuhuili/EAGLE-LLaMA3.1-Instruct-8B","num_speculative_tokens":4}',
+        "option.tensor_parallel_degree": 4,
         "option.max_rolling_batch_size": 4,
     },
     "llama-7b-unmerged-lora": {
diff --git a/tests/integration/tests.py b/tests/integration/tests.py
index 91ed6c9db..fe387ffbe 100644
--- a/tests/integration/tests.py
+++ b/tests/integration/tests.py
@@ -409,11 +409,11 @@ def test_llama_68m_speculative_medusa(self):
 @pytest.mark.gpu_4
 class TestVllm2:
 
-    def test_llama_68m_speculative_eagle(self):
-        with Runner('lmi', 'llama-68m-speculative-eagle') as r:
-            prepare.build_vllm_async_model("llama-68m-speculative-eagle")
+    def test_llama3_1_8b_speculative_eagle(self):
+        with Runner('lmi', 'llama3-1-8b-speculative-eagle') as r:
+            prepare.build_vllm_async_model("llama3-1-8b-speculative-eagle")
             r.launch()
-            client.run("vllm llama-68m-speculative-eagle".split())
+            client.run("vllm llama3-1-8b-speculative-eagle".split())
 
     def test_llama3_1_8b_instruct_tool(self):
         with Runner('lmi', 'llama3-1-8b-instruct-tool') as r: