Source transform torchtune fusion embedding to nn.embedding

jackzhxng · jackzhxng · commit b6044a0b62cd · 2024-10-25T13:27:55.000-07:00
diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py
@@ -883,8 +883,12 @@ def _load_llama_model(
 def _get_source_transforms(  # noqa
     modelname: str, dtype_override: Optional[DType], args
 ) -> List[Callable[[torch.nn.Module], torch.nn.Module]]:
+    is_torchtune_model = modelname in TORCHTUNE_DEFINED_MODELS
     transforms = []
 
+    if is_torchtune_model:
+        transforms.append(replace_fusion_embeddings_with_nn_embedding)
+
     if args.use_spin_quant:
         if args.use_spin_quant == "cuda":
             from .source_transformation.spin_quant import (
@@ -971,4 +975,6 @@ def _get_source_transforms(  # noqa
                 transforms.append(replace_sdpa_with_simple_sdpa)
             transforms.append(replace_kv_cache_with_coreml_kv_cache)
 
+    print(f"Performing the following transforms: {[transform.__name__ for transform in transforms]}")
+
     return transforms
diff --git a/examples/models/llama2/source_transformation/torchtune/embeddings.py b/examples/models/llama2/source_transformation/torchtune/embeddings.py
@@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from torchtune.modules.model_fusion._fusion import FusionEmbedding
+
+
+def _replace_fusion_embeddings_with_nn_embedding(module: torch.nn.Module) -> None:
+    """
+    Replace TorchTune's FusionEmbedding with nn.Embedding. This is because
+    the FusionEmbedding is meant for efficient training and bears no
+    effect on inference. This is better since we get to avoid some of the
+    potentially missing torch ops in the FusionEmbedding such as
+    masked_select and masked_scatter.
+    """
+    
+    for name, child in module.named_children():
+        if isinstance(child, FusionEmbedding):
+            setattr(
+                module,
+                name,
+                torch.nn.Embedding(
+                    child.embedding.num_embeddings + child.fusion_embedding.num_embeddings,
+                    child.dim,
+                )
+            )
+        else:
+            _replace_fusion_embeddings_with_nn_embedding(child)
+
+def replace_fusion_embeddings_with_nn_embedding(module: torch.nn.Module) -> torch.nn.Module:
+    logging.info("Replacing fusion embeddings with nn.embeddings.")
+    _replace_fusion_embeddings_with_nn_embedding(module)
+    return module
+