add tracing tests

kylesayrs · kylesayrs · commit bb1912c578f7 · 2025-04-08T14:50:23.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/.github/workflows/test-check-transformers.yaml b/.github/workflows/test-check-transformers.yaml
@@ -88,3 +88,7 @@ jobs:
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
           pytest -v tests/llmcompressor/transformers/obcq
+      - name: Running Tracing Tests
+        if: (success() || failure()) && steps.install.outcome == 'success'
+        run: |
+          pytest -v tests/llmcompressor/transformers/tracing
diff --git a/src/llmcompressor/transformers/finetune/data/peoples_speech.py b/src/llmcompressor/transformers/finetune/data/peoples_speech.py
@@ -26,20 +26,20 @@ class PeoplesSpeech(TextGenerationDataset):
     :param processor: processor or tokenizer to use on dataset
     """
 
-    def __init__(self, data_args: "DataArgs", split: str, processor: Processor):
-        data_args = deepcopy(data_args)
-        data_args.dataset = "MLCommons/peoples_speech"
-        data_args.dataset_config_name = "test"
-        if not data_args.overwrite_cache:
+    def __init__(self, dataset_args: "DataArgs", split: str, processor: Processor):
+        dataset_args = deepcopy(dataset_args)
+        dataset_args.dataset = "MLCommons/peoples_speech"
+        dataset_args.dataset_config_name = "test"
+        if not dataset_args.overwrite_cache:
             logger.warning(
                 "Because audio processors are more complex, dataset mapping functions "
                 "vary with model architecture and their results cannot be cached. "
                 "Setting overwrite_cache=True"
             )
-            data_args.overwrite_cache = True
+            dataset_args.overwrite_cache = True
         self.processor_type = processor.__class__.__name__
 
-        super().__init__(data_args=data_args, split=split, processor=processor)
+        super().__init__(dataset_args=dataset_args, split=split, processor=processor)
 
     def dataset_template(self, example):
         audio = example["audio"]["array"]
diff --git a/src/llmcompressor/transformers/tracing/debug.py b/src/llmcompressor/transformers/tracing/debug.py
@@ -12,6 +12,8 @@
 from llmcompressor.transformers import TextGenerationDataset
 from llmcompressor.args import DatasetArguments
 
+from llmcompressor.utils.dev import skip_weights_download
+
 __all__ = [
     "get_model_class"
 ]
@@ -24,6 +26,7 @@ def parse_args():
     parser.add_argument("--sequential_targets", type=str, nargs="*", default=None, metavar="TARGET", help="List of targets for sequential tracing")  # noqa: E501
     parser.add_argument("--ignore", type=str, nargs="*", default=[], metavar="PATTERN", help="List of patterns to ignore during tracing")  # noqa: E501
     parser.add_argument("--modality", type=str, default="text", help="Modality of calibration dataset, defaults to text")  # noqa: E501
+    parser.add_argument("--trust_remote_code", type=bool, default=False, help="Whether to trust model remote code")  # noqa: E501
     return parser.parse_args()
 
 
@@ -33,6 +36,7 @@ def trace(
     sequential_targets: Optional[Union[List[str], str]] = None,
     ignore: Union[List[str], str] = [],
     modality: str = "text",
+    trust_remote_code: bool = True
 ):
     """
     Debug traceability by tracing a pre-trained model into subgraphs
@@ -44,6 +48,7 @@ def trace(
         inference
     :param ignore: patterns to ignore during tracing
     :param modality: data modality for dummy tracing data, defaults to 'text'
+    :param trust_remote_code: trust remote model code
 
     Example usage from CLI
     llmcompressor.trace \
@@ -54,12 +59,16 @@ def trace(
         --modality text
     """
     # Load model
-    model = model_class.from_pretrained(
-        model_id,
-        device_map="auto",
-        torch_dtype="auto",
+    with skip_weights_download(model_class):
+        model = model_class.from_pretrained(
+            model_id,
+            device_map="cpu",
+            torch_dtype="auto",
+            trust_remote_code=trust_remote_code,
+        )
+    processor = AutoProcessor.from_pretrained(
+        model_id, trust_remote_code=trust_remote_code
     )
-    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
     print("Loaded model")
 
     # Prepare sample data
@@ -138,6 +147,7 @@ def main():
         sequential_targets=args.sequential_targets,
         ignore=args.ignore,
         modality=args.modality,
+        trust_remote_code=args.trust_remote_code
     )
 
 
diff --git a/src/llmcompressor/transformers/tracing/idefics3.py b/src/llmcompressor/transformers/tracing/idefics3.py
@@ -285,7 +285,7 @@ def __init__(self, config: Idefics3Config):
 
     def forward(
         self,
-        input_ids: torch.LongTensor = None,
+        input_ids: Optional[torch.LongTensor] = None,
         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.LongTensor] = None,
         past_key_values: Optional[List[torch.FloatTensor]] = None,
@@ -296,6 +296,7 @@ def forward(
         use_cache: Optional[bool] = None,
         output_attentions: Optional[bool] = None,
         output_hidden_states: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
         return_dict: Optional[bool] = None,
     ) -> Union[Tuple, Idefics3BaseModelOutputWithPast]:
         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
@@ -394,6 +395,7 @@ def forward(
             use_cache=use_cache,
             output_attentions=output_attentions,
             output_hidden_states=output_hidden_states,
+            cache_position=cache_position,
             return_dict=return_dict,
         )
 
diff --git a/src/llmcompressor/utils/dev.py b/src/llmcompressor/utils/dev.py
@@ -4,10 +4,11 @@
 import tempfile
 from typing import Type
 
+import torch
 from huggingface_hub import snapshot_download
 from safetensors.torch import save_file
 from transformers import AutoModelForCausalLM, PreTrainedModel
-from transformers.modeling_utils import no_init_weights
+from transformers.modeling_utils import TORCH_INIT_FUNCTIONS
 from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, WEIGHTS_INDEX_NAME
 
 from llmcompressor.utils.helpers import patch_attr
@@ -32,6 +33,7 @@ def skip_weights_download(model_class: Type[PreTrainedModel] = AutoModelForCausa
         "*.pth",
         SAFE_WEIGHTS_INDEX_NAME,
         WEIGHTS_INDEX_NAME,
+        "*.msgpack",
     ]
 
     @classmethod
@@ -62,7 +64,21 @@ def patched(cls, *args, **kwargs):
 
     with tempfile.TemporaryDirectory() as tmp_dir, patch_attr(
         model_class, "from_pretrained", patched
-    ), no_init_weights(), patch_transformers_logger_level():
+    ), skip_weights_initialize(), patch_transformers_logger_level():
+        yield
+
+
+@contextlib.contextmanager
+def skip_weights_initialize(use_zeros: bool = False):
+    def skip(tensor: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        if use_zeros:
+            return tensor.fill_(0)
+        return tensor
+
+    with contextlib.ExitStack() as stack:
+        for name in TORCH_INIT_FUNCTIONS.keys():
+            stack.enter_context(patch_attr(torch.nn.init, name, skip))
+            stack.enter_context(patch_attr(torch.Tensor, name, skip))
         yield