diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index aabe5e3fcbb..372670e4101 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -855,9 +855,7 @@ def _to_edge_and_lower_llama_xnnpack( # TODO: Enable generating ETRecord with XNNPack and to_edge_transform_and_lower(). if generate_etrecord: - raise NotImplementedError( - "export_llama does not support XNNPack and generating ETRecord at the moment." - ) + builder_exported.generate_etrecord = True builder = builder_exported.pt2e_quantize(quantizers).to_edge_transform_and_lower( partitioners @@ -865,6 +863,8 @@ def _to_edge_and_lower_llama_xnnpack( if verbose: print_delegation_info(builder.edge_manager.exported_program().graph_module) + # we need builder.export_program + return builder.to_executorch(passes=additional_passes) diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index 6db881c5274..3c8b6b4aa2a 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -96,6 +96,7 @@ def __init__( metadata: Optional[dict] = None, dynamic_shapes: Optional[Any] = None, save_exported_program: bool = False, + generate_etrecord: bool = False, ): # Store necessary constructor arguments. self.model = model @@ -116,6 +117,7 @@ def __init__( self.metadata = metadata self.dynamic_shapes = dynamic_shapes self.save_exported_program = save_exported_program + self.generate_etrecord = generate_etrecord # Note: treat this as the source of truth for the result of # torch.export'ing a model. If the overall ExportedProgram is needed, @@ -481,6 +483,7 @@ def to_edge_transform_and_lower( partitioner=partitioners, compile_config=edge_config, constant_methods=self.metadata, + generate_etrecord=self.generate_etrecord, ) if self.verbose: logging.info(f"Exported graph:\n{self.edge_manager.exported_program()}")