Back to model name

jackzhxng · jackzhxng · commit 14311679fec6 · 2024-11-08T14:32:38.000-08:00
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -79,7 +79,7 @@
 verbosity_setting = None
 
 
-EXECUTORCH_LLAMA = "et_llama"
+EXECUTORCH_DEFINED_MODELS = ["stories110m", "llama2", "llama3", "llama3.1", "llama3.2"]
 TORCHTUNE_DEFINED_MODELS = []
 
 
@@ -107,23 +107,18 @@ def verbose_export():
 
 
 def build_model(
-    modelname: str = "model",
+    modelname: str,
     extra_opts: str = "",
     *,
     par_local_output: bool = False,
     resource_pkg_name: str = __name__,
-    modelclass: str = EXECUTORCH_LLAMA,
 ) -> str:
-    """
-    Build the model, used for tests. `modelname` arg just specifies
-    where to find the model resource files.
-    """
     if False:  # par_local_output:
         output_dir_path = "par:."
     else:
         output_dir_path = "."
 
-    argString = f"--modelclass {modelclass} --checkpoint par:{modelname}_ckpt.pt --params par:{modelname}_params.json {extra_opts} --output-dir {output_dir_path}"
+    argString = f"--modelname {modelname} --checkpoint par:model_ckpt.pt --params par:model_params.json {extra_opts} --output-dir {output_dir_path}"
     parser = build_args_parser()
     args = parser.parse_args(shlex.split(argString))
     # pkg_name = resource_pkg_name
@@ -138,10 +133,10 @@ def build_args_parser() -> argparse.ArgumentParser:
     #     "-q", "--quantized_ckpt", default=None, help="quantized checkpoint file"
     # )
     parser.add_argument(
-        "--modelclass",
-        default=EXECUTORCH_LLAMA,
-        choices=[EXECUTORCH_LLAMA] + TORCHTUNE_DEFINED_MODELS,
-        help='The Lllama model architecture to use. "et_llama" is a custom Llama architecture defined in ExecuTorch that supports llama2, llama3, llama3_1, llama3_2. All other modelclasses are from TorchTune.',
+        "--model",
+        default="llama3",
+        choices=EXECUTORCH_DEFINED_MODELS + TORCHTUNE_DEFINED_MODELS,
+        help="The Lllama model architecture to use. stories110M, llama2, llama3, llama3_1, and llama3_2 use the same underlying LlamaTransformer architecture defined in ExecuTorch. All other models use TorchTune model definitions.",
     )
     parser.add_argument(
         "-E",
@@ -530,7 +525,7 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:
 
     return (
         _load_llama_model(
-            args.modelclass,
+            args.model,
             checkpoint=checkpoint_path,
             checkpoint_dir=checkpoint_dir,
             params_path=params_path,
@@ -553,7 +548,7 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:
             args=args,
         )
         .set_output_dir(output_dir_path)
-        .source_transform(_get_source_transforms(dtype_override, args))
+        .source_transform(_get_source_transforms(args.model, dtype_override, args))
     )
 
 
@@ -771,7 +766,7 @@ def _load_llama_model_metadata(
 
 
 def _load_llama_model(
-    modelclass: str = EXECUTORCH_LLAMA,
+    modelname: str = "llama3",
     *,
     checkpoint: Optional[str] = None,
     checkpoint_dir: Optional[str] = None,
@@ -808,15 +803,15 @@ def _load_llama_model(
         f"Loading model with checkpoint={checkpoint}, params={params_path}, use_kv_cache={use_kv_cache}, weight_type={weight_type}"
     )
 
-    if modelclass == EXECUTORCH_LLAMA:
+    if modelname in EXECUTORCH_DEFINED_MODELS:
         module_name = "llama"
         model_class_name = "Llama2Model"  # TODO: Change to "LlamaModel" in examples/models/llama/model.py.
-    elif modelclass in TORCHTUNE_DEFINED_MODELS:
+    elif modelname in TORCHTUNE_DEFINED_MODELS:
         raise NotImplementedError(
             "Torchtune Llama models are not yet supported in ExecuTorch export."
         )
     else:
-        raise ValueError(f"{modelclass} is not a valid Llama model.")
+        raise ValueError(f"{modelname} is not a valid Llama model.")
 
     model, example_inputs, example_kwarg_inputs, _ = EagerModelFactory.create_model(
         module_name,
@@ -863,7 +858,7 @@ def _load_llama_model(
 
     return LLMEdgeManager(
         model=model,
-        modelname=modelclass,
+        modelname=modelname,
         max_seq_len=model.params.max_seq_len,
         dtype=dtype,
         use_kv_cache=use_kv_cache,
@@ -890,7 +885,7 @@ def _load_llama_model(
 
 
 def _get_source_transforms(  # noqa
-    dtype_override: Optional[DType], args
+    modelname: str, dtype_override: Optional[DType], args
 ) -> List[Callable[[torch.nn.Module], torch.nn.Module]]:
     transforms = []
 
@@ -920,8 +915,9 @@ def _get_source_transforms(  # noqa
         ops that is not quantized.
 
         There are cases where this may be a no-op, namely, if all linears are
-        quantized in the checkpoint.
+        quantizedpp in the checkpoint.
         """
+        modelname = f"{modelname}_q"
         transforms.append(
             get_quant_weight_transform(args, dtype_override, verbose_export())
         )
@@ -936,6 +932,7 @@ def _get_source_transforms(  # noqa
         transformations based on the given checkpoint first. In those cases,
         this wil be a no-op.
         """
+        modelname = f"{modelname}_e"
         transforms.append(get_quant_embedding_transform(args))
 
     if args.expand_rope_table: