-
Notifications
You must be signed in to change notification settings - Fork 453
Expand file tree
/
Copy pathtest_moe_context.py
More file actions
54 lines (39 loc) · 1.86 KB
/
test_moe_context.py
File metadata and controls
54 lines (39 loc) · 1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from unittest.mock import patch
import torch
from compressed_tensors.offload.cache import OffloadCache
from llmcompressor.modeling.moe_context import (
_apply_offloading_to_replacement,
_find_ancestor_with_offload_cache,
)
def test_find_ancestor_with_offload_cache():
"""Test finding ancestor modules with OffloadCache."""
# Module without offload cache
module_no_cache = torch.nn.Linear(10, 10)
assert _find_ancestor_with_offload_cache(module_no_cache) is None
# Module with offload cache
module_with_cache = torch.nn.Linear(10, 10)
module_with_cache._parameters = OffloadCache()
assert _find_ancestor_with_offload_cache(module_with_cache) is module_with_cache
# Parent with child that has cache
parent = torch.nn.Sequential(module_with_cache)
assert _find_ancestor_with_offload_cache(parent) is module_with_cache
@patch("llmcompressor.modeling.moe_context.get_cache_init_kwargs")
@patch("llmcompressor.modeling.moe_context.offload_module")
def test_apply_offloading_to_replacement(mock_offload, mock_get_kwargs):
"""Test offloading is applied from original to replacement."""
mock_get_kwargs.return_value = {"device": "cpu"}
# Original with offload cache
original = torch.nn.Sequential(torch.nn.Linear(10, 10))
original[0]._parameters = OffloadCache()
# Replacement without cache
replacement = torch.nn.Sequential(torch.nn.Linear(10, 10))
_apply_offloading_to_replacement(original, replacement)
# Should call offload_module for the child linear layer
assert mock_offload.called
assert mock_get_kwargs.called
def test_apply_offloading_no_cache():
"""Test no offloading applied when original has no cache."""
original = torch.nn.Linear(10, 10)
replacement = torch.nn.Linear(10, 10)
# Should not raise, just return early
_apply_offloading_to_replacement(original, replacement)