test: add unit and integration tests for fusion-aware file grouping

dzhengAP · dzhengAP · commit 0f959c217180 · 2026-03-20T13:50:57.000-07:00
- 7 unit tests for group_files_by_fused_weights covering co-located,
  cross-shard, three-way split, independent layers, and edge cases
- 3 integration tests for process_file_group_microscale_scheme verifying
  correct key output, original sharding preserved, and size consistency
diff --git a/tests/llmcompressor/entrypoints/model_free/test_reindexing_elimination.py b/tests/llmcompressor/entrypoints/model_free/test_reindexing_elimination.py
@@ -0,0 +1,238 @@
+import pytest
+import torch
+from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme
+from safetensors.torch import save_file
+
+from llmcompressor.entrypoints.model_free.helpers import group_files_by_fused_weights
+from llmcompressor.entrypoints.model_free.process import (
+    process_file_group_microscale_scheme,
+    process_file_microscale_scheme,
+)
+
+
+def _make_nvfp4_scheme():
+    return QuantizationScheme(
+        targets=["Linear"],
+        weights=QuantizationArgs(
+            num_bits=4,
+            type="float",
+            strategy="tensor_group",
+            group_size=16,
+            symmetric=True,
+            dynamic=False,
+            scale_dtype=torch.float8_e4m3fn,
+        ),
+    )
+
+
+def _rand_weight(*shape):
+    return torch.randn(*shape, dtype=torch.float16)
+
+
+class TestGroupFilesByFusedWeights:
+    def test_no_cross_shard_fused_weights_returns_singletons(self):
+        weight_map = {
+            "model.layers.0.self_attn.q_proj.weight": "shard-00001.safetensors",
+            "model.layers.0.self_attn.k_proj.weight": "shard-00001.safetensors",
+            "model.layers.0.self_attn.v_proj.weight": "shard-00001.safetensors",
+            "model.layers.0.mlp.gate_proj.weight": "shard-00002.safetensors",
+            "model.layers.0.mlp.up_proj.weight": "shard-00002.safetensors",
+            "model.layers.0.mlp.down_proj.weight": "shard-00002.safetensors",
+        }
+        groups = group_files_by_fused_weights(weight_map)
+        assert len(groups) == 2
+        assert all(len(g) == 1 for g in groups)
+
+    def test_cross_shard_qkv_grouped_together(self):
+        weight_map = {
+            "model.layers.0.self_attn.q_proj.weight": "shard-00001.safetensors",
+            "model.layers.0.self_attn.k_proj.weight": "shard-00002.safetensors",
+            "model.layers.0.self_attn.v_proj.weight": "shard-00002.safetensors",
+            "model.layers.0.mlp.down_proj.weight": "shard-00001.safetensors",
+        }
+        groups = group_files_by_fused_weights(weight_map)
+        assert len(groups) == 1
+        assert sorted(groups[0]) == [
+            "shard-00001.safetensors",
+            "shard-00002.safetensors",
+        ]
+
+    def test_cross_shard_gate_up_grouped_together(self):
+        weight_map = {
+            "model.layers.0.mlp.gate_proj.weight": "shard-00001.safetensors",
+            "model.layers.0.mlp.up_proj.weight": "shard-00002.safetensors",
+            "model.layers.0.mlp.down_proj.weight": "shard-00002.safetensors",
+        }
+        groups = group_files_by_fused_weights(weight_map)
+        assert len(groups) == 1
+        assert sorted(groups[0]) == [
+            "shard-00001.safetensors",
+            "shard-00002.safetensors",
+        ]
+
+    def test_independent_layers_not_merged(self):
+        weight_map = {
+            "model.layers.0.self_attn.q_proj.weight": "shard-00001.safetensors",
+            "model.layers.0.self_attn.k_proj.weight": "shard-00001.safetensors",
+            "model.layers.0.self_attn.v_proj.weight": "shard-00001.safetensors",
+            "model.layers.1.self_attn.q_proj.weight": "shard-00002.safetensors",
+            "model.layers.1.self_attn.k_proj.weight": "shard-00002.safetensors",
+            "model.layers.1.self_attn.v_proj.weight": "shard-00002.safetensors",
+        }
+        groups = group_files_by_fused_weights(weight_map)
+        assert len(groups) == 2
+        assert all(len(g) == 1 for g in groups)
+
+    def test_three_way_cross_shard_group(self):
+        weight_map = {
+            "model.layers.0.self_attn.q_proj.weight": "shard-00001.safetensors",
+            "model.layers.0.self_attn.k_proj.weight": "shard-00002.safetensors",
+            "model.layers.0.self_attn.v_proj.weight": "shard-00003.safetensors",
+        }
+        groups = group_files_by_fused_weights(weight_map)
+        assert len(groups) == 1
+        assert sorted(groups[0]) == [
+            "shard-00001.safetensors",
+            "shard-00002.safetensors",
+            "shard-00003.safetensors",
+        ]
+
+    def test_empty_weight_map(self):
+        groups = group_files_by_fused_weights({})
+        assert groups == []
+
+    def test_single_file_no_fused_weights(self):
+        weight_map = {
+            "model.embed_tokens.weight": "model.safetensors",
+            "lm_head.weight": "model.safetensors",
+        }
+        groups = group_files_by_fused_weights(weight_map)
+        assert len(groups) == 1
+        assert groups[0] == ["model.safetensors"]
+
+
+class TestProcessFileGroupMicroscaleScheme:
+    @pytest.fixture
+    def qkv_tensors(self):
+        return {
+            "model.layers.0.self_attn.q_proj.weight": _rand_weight(32, 32),
+            "model.layers.0.self_attn.k_proj.weight": _rand_weight(32, 32),
+            "model.layers.0.self_attn.v_proj.weight": _rand_weight(32, 32),
+            "model.layers.0.mlp.down_proj.weight": _rand_weight(32, 32),
+        }
+
+    def _save_split_shards(self, tmp_path, tensors):
+        shard1 = {"model.layers.0.self_attn.q_proj.weight":
+                  tensors["model.layers.0.self_attn.q_proj.weight"]}
+        shard2 = {k: v for k, v in tensors.items()
+                  if k != "model.layers.0.self_attn.q_proj.weight"}
+        shard1_path = tmp_path / "shard-00001.safetensors"
+        shard2_path = tmp_path / "shard-00002.safetensors"
+        save_file(shard1, shard1_path)
+        save_file(shard2, shard2_path)
+        return shard1_path, shard2_path
+
+    def _save_merged_shard(self, tmp_path, tensors):
+        merged_path = tmp_path / "merged.safetensors"
+        save_file(tensors, merged_path)
+        return merged_path
+
+    def test_group_processing_produces_same_keys_as_single_shard(
+        self, qkv_tensors, tmp_path
+    ):
+        scheme = _make_nvfp4_scheme()
+        split_dir = tmp_path / "split"
+        split_dir.mkdir()
+        merged_dir = tmp_path / "merged"
+        merged_dir.mkdir()
+        group_out_dir = tmp_path / "group_out"
+        group_out_dir.mkdir()
+        merged_out_dir = tmp_path / "merged_out"
+        merged_out_dir.mkdir()
+
+        shard1_path, shard2_path = self._save_split_shards(split_dir, qkv_tensors)
+        merged_path = self._save_merged_shard(merged_dir, qkv_tensors)
+
+        save_paths = [
+            group_out_dir / "shard-00001.safetensors",
+            group_out_dir / "shard-00002.safetensors",
+        ]
+        _, weight_map_group = process_file_group_microscale_scheme(
+            file_paths=[shard1_path, shard2_path],
+            save_paths=save_paths,
+            scheme=scheme,
+            ignore=[],
+            device="cpu",
+        )
+
+        _, weight_map_merged = process_file_microscale_scheme(
+            file_path=merged_path,
+            save_path=merged_out_dir / "merged.safetensors",
+            scheme=scheme,
+            ignore=[],
+            device="cpu",
+        )
+
+        assert set(weight_map_group.keys()) == set(weight_map_merged.keys())
+
+    def test_group_processing_preserves_original_sharding(
+        self, qkv_tensors, tmp_path
+    ):
+        scheme = _make_nvfp4_scheme()
+        split_dir = tmp_path / "split"
+        split_dir.mkdir()
+        out_dir = tmp_path / "out"
+        out_dir.mkdir()
+
+        shard1_path, shard2_path = self._save_split_shards(split_dir, qkv_tensors)
+        save_paths = [
+            out_dir / "shard-00001.safetensors",
+            out_dir / "shard-00002.safetensors",
+        ]
+        process_file_group_microscale_scheme(
+            file_paths=[shard1_path, shard2_path],
+            save_paths=save_paths,
+            scheme=scheme,
+            ignore=[],
+            device="cpu",
+        )
+
+        for save_path in save_paths:
+            assert save_path.exists()
+            assert save_path.stat().st_size > 0
+
+    def test_group_processing_total_size_matches_merged(
+        self, qkv_tensors, tmp_path
+    ):
+        scheme = _make_nvfp4_scheme()
+        split_dir = tmp_path / "split"
+        split_dir.mkdir()
+        merged_dir = tmp_path / "merged"
+        merged_dir.mkdir()
+        group_out_dir = tmp_path / "group_out"
+        group_out_dir.mkdir()
+        merged_out_dir = tmp_path / "merged_out"
+        merged_out_dir.mkdir()
+
+        shard1_path, shard2_path = self._save_split_shards(split_dir, qkv_tensors)
+        merged_path = self._save_merged_shard(merged_dir, qkv_tensors)
+
+        save_paths = [
+            group_out_dir / "shard-00001.safetensors",
+            group_out_dir / "shard-00002.safetensors",
+        ]
+        total_size_group, _ = process_file_group_microscale_scheme(
+            file_paths=[shard1_path, shard2_path],
+            save_paths=save_paths,
+            scheme=scheme,
+            ignore=[],
+            device="cpu",
+        )
+        total_size_merged, _ = process_file_microscale_scheme(
+            file_path=merged_path,
+            save_path=merged_out_dir / "merged.safetensors",
+            scheme=scheme,
+            ignore=[],
+            device="cpu",
+        )
+        assert total_size_group == total_size_merged