Support MaskedLM from HuggingFace

Guang Yang · Guang Yang · commit c8e8b7515bce · 2024-10-31T16:41:30.000-07:00
diff --git a/extension/export_util/export_hf_model.py b/extension/export_util/export_hf_model.py
@@ -10,45 +10,80 @@
 import torch
 import torch.export._trace
 from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
-from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig, to_edge
+from executorch.exir import (
+    EdgeCompileConfig,
+    ExecutorchBackendConfig,
+    to_edge,
+    to_edge_transform_and_lower,
+)
 from torch.nn.attention import SDPBackend
-from transformers import AutoModelForCausalLM
+from transformers import (
+    AutoConfig,
+    AutoImageProcessor,
+    AutoModelForCausalLM,
+    AutoModelForMaskedLM,
+    AutoModelForSemanticSegmentation,
+    AutoTokenizer,
+)
 from transformers.generation.configuration_utils import GenerationConfig
 from transformers.integrations.executorch import convert_and_export_with_cache
 from transformers.modeling_utils import PreTrainedModel
 
+from .task_registry import register_task, task_registry
 
-def main() -> None:
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "-hfm",
-        "--hf_model_repo",
-        required=True,
-        default=None,
-        help="a valid huggingface model repo name",
+
+@register_task("masked_lm")
+def export_masked_lm(args):
+    device = "cpu"
+    max_length = 64
+    attn_implementation = "sdpa"
+
+    config = AutoConfig.from_pretrained(args.hf_model_repo)
+    kwargs = {}
+    if hasattr(config, "use_cache"):
+        kwargs["use_cache"] = True
+
+    print(f"DEBUG: attn_implementation: {attn_implementation}")
+    tokenizer = AutoTokenizer.from_pretrained(args.hf_model_repo)
+    mask_token = tokenizer.mask_token
+    print(f"Mask token: {mask_token}")
+    inputs = tokenizer(
+        f"The goal of life is {mask_token}.",
+        return_tensors="pt",
+        padding="max_length",
+        max_length=max_length,
     )
-    parser.add_argument(
-        "-d",
-        "--dtype",
-        type=str,
-        choices=["float32", "float16", "bfloat16"],
-        default="float32",
-        help="specify the dtype for loading the model",
+
+    model = AutoModelForMaskedLM.from_pretrained(
+        args.hf_model_repo,
+        device_map=device,
+        attn_implementation=attn_implementation,
+        **kwargs,
     )
-    parser.add_argument(
-        "-o",
-        "--output_name",
-        required=False,
-        default=None,
-        help="output name of the exported model",
+    print(f"{model.config}")
+    print(f"{model.generation_config}")
+
+    # pre-autograd export. eventually this will become torch.export
+    exported_program = torch.export.export_for_training(
+        model,
+        args=(inputs["input_ids"],),
+        kwargs={"attention_mask": inputs["attention_mask"]},
+        strict=True,
     )
 
-    args = parser.parse_args()
+    return model, to_edge_transform_and_lower(
+        exported_program,
+        partitioner=[XnnpackPartitioner()],
+        compile_config=EdgeCompileConfig(
+            _skip_dim_order=True,
+        ),
+    ).to_executorch(config=ExecutorchBackendConfig(extract_delegate_segments=False))
 
-    # Configs to HF model
+
+@register_task("causal_lm")
+def export_causal_lm(args):
     device = "cpu"
-    # TODO: remove getattr once https://github.com/huggingface/transformers/pull/33741 is merged
-    dtype = getattr(torch, args.dtype)
+    dtype = args.dtype
     batch_size = 1
     max_length = 123
     cache_implementation = "static"
@@ -106,11 +141,87 @@ def _get_constant_methods(model: PreTrainedModel):
             .to_backend(XnnpackPartitioner())
             .to_executorch(ExecutorchBackendConfig(extract_delegate_segments=True))
         )
-        out_name = args.output_name if args.output_name else model.config.model_type
-        filename = os.path.join("./", f"{out_name}.pte")
-        with open(filename, "wb") as f:
-            prog.write_to_file(f)
-            print(f"Saved exported program to {filename}")
+
+    return model, prog
+
+
+@register_task("semantic_segmentation")
+def export_semantic_segmentation(args):
+    import requests
+    from PIL import Image
+
+    device = "cpu"
+    model = AutoModelForSemanticSegmentation.from_pretrained(
+        args.hf_model_repo,
+        device_map=device,
+    )
+    image_processor = AutoImageProcessor.from_pretrained(
+        args.hf_model_repo,
+        device_map=device,
+    )
+    image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png"
+    image = Image.open(requests.get(image_url, stream=True).raw)
+    inputs = image_processor(images=image, return_tensors="pt")
+
+    exported_program = torch.export.export_for_training(
+        model,
+        args=(inputs["pixel_values"],),
+        strict=True,
+    )
+
+    return model, to_edge_transform_and_lower(
+        exported_program,
+        partitioner=[XnnpackPartitioner()],
+        compile_config=EdgeCompileConfig(
+            _skip_dim_order=True,
+        ),
+    ).to_executorch(config=ExecutorchBackendConfig(extract_delegate_segments=False))
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-hfm",
+        "--hf_model_repo",
+        required=True,
+        default=None,
+        help="a valid huggingface model repo name",
+    )
+    parser.add_argument(
+        "-d",
+        "--dtype",
+        type=str,
+        choices=["float32", "float16", "bfloat16"],
+        default="float32",
+        help="specify the dtype for loading the model",
+    )
+    parser.add_argument(
+        "-o",
+        "--output_name",
+        required=False,
+        default=None,
+        help="output name of the exported model",
+    )
+    parser.add_argument(
+        "-t",
+        "--task",
+        type=str,
+        choices=list(task_registry.keys()),
+        default="causal_lm",
+        help=f"type of task of the model to load from huggingface. supported tasks: {task_registry.keys()}",
+    )
+
+    args = parser.parse_args()
+    try:
+        model, prog = task_registry[args.task](args)
+    except AttributeError:
+        raise ValueError(f"Unsupported task type {args.task}")
+
+    out_name = args.output_name if args.output_name else model.config.model_type
+    filename = os.path.join("./", f"{out_name}.pte")
+    with open(filename, "wb") as f:
+        prog.write_to_file(f)
+        print(f"Saved exported program to {filename}")
 
 
 if __name__ == "__main__":
diff --git a/extension/export_util/task_registry.py b/extension/export_util/task_registry.py
@@ -0,0 +1,14 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+task_registry = {}
+
+def register_task(task_name):
+    def decorator(func):
+        task_registry[task_name] = func
+        return func
+    return decorator