llava use to_edge_transform (#4580)

mcr229 · facebook-github-bot · commit 76c85e100b35 · 2024-08-07T12:54:34.000-07:00
Summary: Pull Request resolved: #4580 Use XNNPACK's new partitioner (which uses to_edge_transform_and_lower) to lower the Llava model, this avoids some of the errors we were encountering before (linear recomposition) So it should work well. Differential Revision: D60125952 fbshipit-source-id: cfb95b91a79e16618931ed8930dc99197df09e0d
diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py
@@ -8,9 +8,11 @@
 from argparse import ArgumentParser, BooleanOptionalAction
 
 import torch
-from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
-    XnnpackDynamicallyQuantizedPartitioner,
-    # XnnpackFloatingPointPartitioner,
+from executorch.backends.xnnpack.partition.config.xnnpack_config import (
+    ConfigPrecisionType,
+)
+from executorch.backends.xnnpack.partition.xnnpack_partitioner2 import (
+    XnnpackPartitioner,
 )
 from executorch.examples.models.llama2.export_llama_lib import (
     build_args_parser,
@@ -22,7 +24,8 @@
 from executorch.examples.models.llama2.source_transformation.sdpa import (
     replace_sdpa_with_custom_op,
 )
-from executorch.exir import EdgeCompileConfig, to_edge
+from executorch.exir import EdgeCompileConfig
+from executorch.exir.program._program import _to_edge_transform_and_lower
 
 from executorch.extension.llm.export.builder import DType, LLMEdgeManager
 from model import LlavaModel
@@ -201,22 +204,27 @@ def main():
 
     token_embedding_ep = export_token_embedding(llava, prompt_before_image)
 
-    edge_ep = to_edge(
+    lowered_and_edge = _to_edge_transform_and_lower(
         {
             "image_encoder": image_encoder_ep,
             "token_embedding": token_embedding_ep,
             "text_model": text_model_ep,
         },
+        partitioner={
+            "image_encoder": [
+                XnnpackPartitioner(config_precisions=ConfigPrecisionType.FP32)
+            ],
+            "text_model": [
+                XnnpackPartitioner(
+                    config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
+                    per_op_mode=True,
+                )
+            ],
+        },
         compile_config=EdgeCompileConfig(_check_ir_validity=False),
     )
 
-    executorch_program = edge_ep.to_backend(
-        {
-            # TODO: Fix Xnnpack partitioner issue on image encoder.
-            # "image_encoder": XnnpackFloatingPointPartitioner(),
-            "text_model": XnnpackDynamicallyQuantizedPartitioner(),
-        }
-    ).to_executorch()
+    executorch_program = lowered_and_edge.to_executorch()
 
     with open(args.pte_name, "wb") as f:
         executorch_program.write_to_file(f)