fix codecov

NicholasTao · NicholasTao · commit 9747c93a8692 · 2025-08-11T10:43:59.000+08:00
Signed-off-by: taoyuxiang &lt;oui.nicholas.tao@gmail.com&gt;
diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
@@ -108,4 +108,46 @@ def test_eagle_correctness(
     model_name: str,
     use_eagle3: bool,
 ):
-    pass
+    '''
+    Compare the outputs of a original LLM and a speculative LLM
+    should be the same when using eagle speculative decoding.
+    '''
+    if not use_eagle3:
+        pytest.skip("Not current support for the test.")
+
+    ref_llm = LLM(model=model_name, max_model_len=2048, enforce_eager=True)
+    ref_outputs = ref_llm.chat(test_prompts, sampling_config)
+    del ref_llm
+
+    spec_model_name = eagle3_model_name() if use_eagle3 else eagle_model_name()
+    spec_llm = LLM(
+        model=model_name,
+        trust_remote_code=True,
+        enable_chunked_prefill=True,
+        max_num_seqs=1,
+        max_num_batched_tokens=2048,
+        gpu_memory_utilization=0.6,
+        speculative_config={
+            "method": "eagle3" if use_eagle3 else "eagle",
+            "model": spec_model_name,
+            "num_speculative_tokens": 2,
+            "max_model_len": 128,
+        },
+        max_model_len=128,
+        enforce_eager=True,
+    )
+    spec_outputs = spec_llm.chat(test_prompts, sampling_config)
+    matches = 0
+    misses = 0
+    for ref_output, spec_output in zip(ref_outputs, spec_outputs):
+        if ref_output.outputs[0].text == spec_output.outputs[0].text:
+            matches += 1
+        else:
+            misses += 1
+            print(f"ref_output: {ref_output.outputs[0].text}")
+            print(f"spec_output: {spec_output.outputs[0].text}")
+
+    # Heuristic: expect at least 66% of the prompts to match exactly
+    # Upon failure, inspect the outputs to check for inaccuracy.
+    assert matches > int(0.66 * len(ref_outputs))
+    del spec_llm
diff --git a/tests/ut/ops/test_rotary_embedding.py b/tests/ut/ops/test_rotary_embedding.py
@@ -4,7 +4,8 @@
 import torch
 
 from tests.ut.base import TestBase
-from vllm_ascend.ops.rotary_embedding import (custom_rotary_embedding_enabled,
+from vllm_ascend.ops.rotary_embedding import (__set_cos_sin_cache,
+                                              custom_rotary_embedding_enabled,
                                               native_rope_deepseek_forward,
                                               rope_forward_oot, rotate_half,
                                               yarn_find_correction_dim,
@@ -312,3 +313,52 @@ def test_scale_greater_than_1(self):
                 expected,
                 places=6,
                 msg=f"Failed for scale={scale}, mscale={mscale}")
+
+
+class MockRotaryEmbedding(torch.nn.Module):
+
+    def __init__(self, base, rotary_dim, max_position_embeddings):
+        super().__init__()
+
+        self.base = base
+
+        self.rotary_dim = rotary_dim
+
+        self.max_position_embeddings = max_position_embeddings
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        return __set_cos_sin_cache(self, seq_len, device, dtype)
+
+
+class TestSetCosSinCache(TestBase):
+
+    def test_set_cos_sin_cache_registers_buffers_and_sets_embed(self):
+        # prepare an instance with reasonable values
+        base = 10000.0
+        rotary_dim = 4
+        max_pos = 10
+        model = MockRotaryEmbedding(base, rotary_dim, max_pos)
+        # mock out register_buffer
+        model.register_buffer = MagicMock()
+        # call the private method via name mangling
+        model._RotaryEmbedding._set_cos_sin_cache(seq_len=8,
+                                                  device="cpu",
+                                                  dtype=torch.float32)
+        # expect three calls: inv_freq, cos, sin
+        assert model.register_buffer.call_count == 3
+        names = [call.args[0] for call in model.register_buffer.call_args_list]
+        assert set(names) == {"inv_freq", "cos", "sin"}
+        # verify inv_freq shape
+        inv_freq = model.register_buffer.call_args_list[0].args[1]
+        assert isinstance(inv_freq, torch.Tensor)
+        assert inv_freq.shape == (rotary_dim // 2, )
+        # verify cos buffer
+        cos = model.register_buffer.call_args_list[1].args[1]
+        assert isinstance(cos, torch.Tensor)
+        assert cos.shape == (max_pos, rotary_dim)
+        assert cos.dtype == torch.float32
+        # verify sin buffer
+        sin = model.register_buffer.call_args_list[2].args[1]
+        assert isinstance(sin, torch.Tensor)
+        assert sin.shape == (max_pos, rotary_dim)
+        assert sin.dtype == torch.float32