Tarun pr rev / fix test

jackzhxng · jackzhxng · commit d3d8e7dda33f · 2025-03-22T15:49:58.000-07:00
diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py
@@ -178,7 +178,7 @@ def __init__(self, **kwargs):
             if checkpoint:
                 self.model_.checkpoint_dtype = get_checkpoint_dtype(checkpoint)
             else:
-                self.model_.checkpoint_dtype = None
+                self.model_.checkpoint_dtype = torch.float32
 
         if "int8" in str(checkpoint_path):
             print("Using int8 weight-only quantization!")
diff --git a/examples/models/llama/tests/test_export_llama_lib.py b/examples/models/llama/tests/test_export_llama_lib.py
@@ -5,17 +5,22 @@
 # LICENSE file in the root directory of this source tree.
 
 import unittest
+from argparse import Namespace
 
 import torch
 
 from executorch.devtools.backend_debug import get_delegation_info
-from executorch.examples.models.llama.export_llama_lib import _export_llama, build_args_parser
+from executorch.examples.models.llama.export_llama_lib import (
+    _export_llama,
+    build_args_parser,
+)
 
 UNWANTED_OPS = [
     "aten_permute_copy_default",
     "aten_transpose_copy_default",
 ]
 
+
 class ExportLlamaLibTest(unittest.TestCase):
     def test_has_expected_ops_and_op_counts(self):
         """
@@ -32,19 +37,14 @@ def test_has_expected_ops_and_op_counts(self):
         # we cannot test quantization args in this way
         # since quantization requires promoting meta tensors
         # to the cpu device, which requires real weights.
-        export_args_str = """
-            --use_sdpa_with_kv_cache
-            -kv
-            --verbose
-        """
-        args_list = export_args_str.strip().split()
-        parser = build_args_parser()
-        args = parser.parse_args(args_list)
+        args = Namespace()
+        args.use_sdpa_with_kv_cache = True
+        args.use_kv_cache = True
+        args.verbose = True
 
         builder = _export_llama(args)
         graph_module = builder.edge_manager.exported_program().graph_module
         delegation_info = get_delegation_info(graph_module)
 
         for op, op_info in delegation_info.delegation_by_operator.items():
             self.assertTrue(op not in UNWANTED_OPS)
-