Skip to content

Commit 76c85e1

Browse files
mcr229facebook-github-bot
authored andcommitted
llava use to_edge_transform (#4580)
Summary: Pull Request resolved: #4580 Use XNNPACK's new partitioner (which uses to_edge_transform_and_lower) to lower the Llava model, this avoids some of the errors we were encountering before (linear recomposition) So it should work well. Differential Revision: D60125952 fbshipit-source-id: cfb95b91a79e16618931ed8930dc99197df09e0d
1 parent ad3c6f4 commit 76c85e1

File tree

1 file changed

+20
-12
lines changed

1 file changed

+20
-12
lines changed

examples/models/llava/export_llava.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
from argparse import ArgumentParser, BooleanOptionalAction
99

1010
import torch
11-
from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
12-
XnnpackDynamicallyQuantizedPartitioner,
13-
# XnnpackFloatingPointPartitioner,
11+
from executorch.backends.xnnpack.partition.config.xnnpack_config import (
12+
ConfigPrecisionType,
13+
)
14+
from executorch.backends.xnnpack.partition.xnnpack_partitioner2 import (
15+
XnnpackPartitioner,
1416
)
1517
from executorch.examples.models.llama2.export_llama_lib import (
1618
build_args_parser,
@@ -22,7 +24,8 @@
2224
from executorch.examples.models.llama2.source_transformation.sdpa import (
2325
replace_sdpa_with_custom_op,
2426
)
25-
from executorch.exir import EdgeCompileConfig, to_edge
27+
from executorch.exir import EdgeCompileConfig
28+
from executorch.exir.program._program import _to_edge_transform_and_lower
2629

2730
from executorch.extension.llm.export.builder import DType, LLMEdgeManager
2831
from model import LlavaModel
@@ -201,22 +204,27 @@ def main():
201204

202205
token_embedding_ep = export_token_embedding(llava, prompt_before_image)
203206

204-
edge_ep = to_edge(
207+
lowered_and_edge = _to_edge_transform_and_lower(
205208
{
206209
"image_encoder": image_encoder_ep,
207210
"token_embedding": token_embedding_ep,
208211
"text_model": text_model_ep,
209212
},
213+
partitioner={
214+
"image_encoder": [
215+
XnnpackPartitioner(config_precisions=ConfigPrecisionType.FP32)
216+
],
217+
"text_model": [
218+
XnnpackPartitioner(
219+
config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
220+
per_op_mode=True,
221+
)
222+
],
223+
},
210224
compile_config=EdgeCompileConfig(_check_ir_validity=False),
211225
)
212226

213-
executorch_program = edge_ep.to_backend(
214-
{
215-
# TODO: Fix Xnnpack partitioner issue on image encoder.
216-
# "image_encoder": XnnpackFloatingPointPartitioner(),
217-
"text_model": XnnpackDynamicallyQuantizedPartitioner(),
218-
}
219-
).to_executorch()
227+
executorch_program = lowered_and_edge.to_executorch()
220228

221229
with open(args.pte_name, "wb") as f:
222230
executorch_program.write_to_file(f)

0 commit comments

Comments
 (0)