Tarun pr rev / fix test

jackzhxng · jackzhxng · commit 738cdf094904 · 2025-03-22T15:47:42.000-07:00
diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py
@@ -178,7 +178,7 @@ def __init__(self, **kwargs):
             if checkpoint:
                 self.model_.checkpoint_dtype = get_checkpoint_dtype(checkpoint)
             else:
-                self.model_.checkpoint_dtype = None
+                self.model_.checkpoint_dtype = torch.float32
 
         if "int8" in str(checkpoint_path):
             print("Using int8 weight-only quantization!")
diff --git a/examples/models/llama/tests/test_export_llama_lib.py b/examples/models/llama/tests/test_export_llama_lib.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+from argparse import Namespace
 import unittest
 
 import torch
@@ -32,14 +33,10 @@ def test_has_expected_ops_and_op_counts(self):
         # we cannot test quantization args in this way
         # since quantization requires promoting meta tensors
         # to the cpu device, which requires real weights.
-        export_args_str = """
-            --use_sdpa_with_kv_cache
-            -kv
-            --verbose
-        """
-        args_list = export_args_str.strip().split()
-        parser = build_args_parser()
-        args = parser.parse_args(args_list)
+        args = Namespace()
+        args.use_sdpa_with_kv_cache = True
+        args.use_kv_cache = True
+        args.verbose = True
 
         builder = _export_llama(args)
         graph_module = builder.edge_manager.exported_program().graph_module