Reduce eagle example test memory usage from 28 to 1 GB (#299)

kevalmorabia97 · web-flow · commit cf6f1d4e23c6 · 2025-09-08T21:35:15.000+05:30
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/examples/speculative_decoding/launch.sh b/examples/speculative_decoding/launch.sh
@@ -150,6 +150,7 @@ CMD="accelerate launch $MULTI_GPU --mixed_precision bf16 main.py \
     --logging_steps 100 \
     --tf32 True \
     --data_path $DATA \
+    --report_to tensorboard \
     $SPECULATIVE_ARGS
 "
 
diff --git a/tests/examples/speculative_decoding/test_eagle.py b/tests/examples/speculative_decoding/test_eagle.py
@@ -13,12 +13,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 
 from _test_utils.examples.run_command import run_example_command
 
 
 # fmt: off
-def test_llama_eagle(tiny_llama_path, num_gpus, tiny_daring_anteater_path, tmp_path):
+def test_llama_eagle3(tiny_llama_path, num_gpus, tiny_daring_anteater_path, tmp_path):
+    # Create an ultra-tiny EAGLE config for testing to reduce memory usage
+    tiny_eagle_config = {
+        "max_position_embeddings": 128,
+        "num_hidden_layers": 1,
+        "intermediate_size": 64,
+        "num_attention_heads": 2,
+        "num_key_value_heads": 2,
+    }
+
+    # Write the tiny config to a temporary file
+    config_file = tmp_path / "tiny_eagle_config.json"
+    with open(config_file, "w") as f:
+        json.dump(tiny_eagle_config, f)
+
     run_example_command(
         [
             "./launch.sh",
@@ -29,7 +44,9 @@ def test_llama_eagle(tiny_llama_path, num_gpus, tiny_daring_anteater_path, tmp_p
             "--do_eval", "False",
             "--num_gpu", str(num_gpus),
             "--mode", "eagle3",
+            "--eagle_config", str(config_file),
             "--output_dir", tmp_path / "eagle-tinyllama",
+            "--training_seq_len", "128", # Match max_position_embeddings
         ],
         "speculative_decoding",
     )
diff --git a/tests/gpu/torch/export/test_unified_export_megatron.py b/tests/gpu/torch/export/test_unified_export_megatron.py
@@ -110,12 +110,6 @@ def _test_unified_export_megatron(tmp_path, model_type, arch, algo, rank, size):
     ],
 )
 def test_unified_export_megatron(tmp_path, model_type, arch, algo):
-    if algo == "eagle":
-        try:
-            import megatron.core.post_training  # noqa: F401
-        except ImportError:
-            pytest.skip("megatron.core.post_training not found")
-
     # TODO: Fix TP>1 failures
     spawn_multiprocess_job(
         size=1,  # torch.cuda.device_count(),

Original file line number	Diff line number	Diff line change
`@@ -150,6 +150,7 @@ CMD="accelerate launch $MULTI_GPU --mixed_precision bf16 main.py \`
`150`	`150`	`--logging_steps 100 \`
`151`	`151`	`--tf32 True \`
`152`	`152`	`--data_path $DATA \`
	`153`	`+ --report_to tensorboard \`
`153`	`154`	`$SPECULATIVE_ARGS`
`154`	`155`	`"`
`155`	`156`