|
8 | 8 | from argparse import ArgumentParser, BooleanOptionalAction |
9 | 9 |
|
10 | 10 | import torch |
11 | | -from executorch.backends.xnnpack.partition.xnnpack_partitioner import ( |
12 | | - XnnpackDynamicallyQuantizedPartitioner, |
13 | | - # XnnpackFloatingPointPartitioner, |
| 11 | +from executorch.backends.xnnpack.partition.config.xnnpack_config import ( |
| 12 | + ConfigPrecisionType, |
| 13 | +) |
| 14 | +from executorch.backends.xnnpack.partition.xnnpack_partitioner2 import ( |
| 15 | + XnnpackPartitioner, |
14 | 16 | ) |
15 | 17 | from executorch.examples.models.llama2.export_llama_lib import ( |
16 | 18 | build_args_parser, |
|
22 | 24 | from executorch.examples.models.llama2.source_transformation.sdpa import ( |
23 | 25 | replace_sdpa_with_custom_op, |
24 | 26 | ) |
25 | | -from executorch.exir import EdgeCompileConfig, to_edge |
| 27 | +from executorch.exir import EdgeCompileConfig |
| 28 | +from executorch.exir.program._program import _to_edge_transform_and_lower |
26 | 29 |
|
27 | 30 | from executorch.extension.llm.export.builder import DType, LLMEdgeManager |
28 | 31 | from model import LlavaModel |
@@ -201,22 +204,27 @@ def main(): |
201 | 204 |
|
202 | 205 | token_embedding_ep = export_token_embedding(llava, prompt_before_image) |
203 | 206 |
|
204 | | - edge_ep = to_edge( |
| 207 | + lowered_and_edge = _to_edge_transform_and_lower( |
205 | 208 | { |
206 | 209 | "image_encoder": image_encoder_ep, |
207 | 210 | "token_embedding": token_embedding_ep, |
208 | 211 | "text_model": text_model_ep, |
209 | 212 | }, |
| 213 | + partitioner={ |
| 214 | + "image_encoder": [ |
| 215 | + XnnpackPartitioner(config_precisions=ConfigPrecisionType.FP32) |
| 216 | + ], |
| 217 | + "text_model": [ |
| 218 | + XnnpackPartitioner( |
| 219 | + config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, |
| 220 | + per_op_mode=True, |
| 221 | + ) |
| 222 | + ], |
| 223 | + }, |
210 | 224 | compile_config=EdgeCompileConfig(_check_ir_validity=False), |
211 | 225 | ) |
212 | 226 |
|
213 | | - executorch_program = edge_ep.to_backend( |
214 | | - { |
215 | | - # TODO: Fix Xnnpack partitioner issue on image encoder. |
216 | | - # "image_encoder": XnnpackFloatingPointPartitioner(), |
217 | | - "text_model": XnnpackDynamicallyQuantizedPartitioner(), |
218 | | - } |
219 | | - ).to_executorch() |
| 227 | + executorch_program = lowered_and_edge.to_executorch() |
220 | 228 |
|
221 | 229 | with open(args.pte_name, "wb") as f: |
222 | 230 | executorch_program.write_to_file(f) |
|
0 commit comments