diff --git a/backends/apple/coreml/compiler/coreml_preprocess.py b/backends/apple/coreml/compiler/coreml_preprocess.py index c7828888ee5..d1c41c78937 100644 --- a/backends/apple/coreml/compiler/coreml_preprocess.py +++ b/backends/apple/coreml/compiler/coreml_preprocess.py @@ -446,7 +446,7 @@ def preprocess( config = cto.coreml.OptimizationConfig( global_config=op_linear_quantizer_config, # skip embedding - op_type_configs={"gather": None}, + # op_type_configs={"gather": None}, ) mlmodel = cto.coreml.linear_quantize_weights(mlmodel, config=config) diff --git a/examples/apple/coreml/executor_runner/main.mm b/examples/apple/coreml/executor_runner/main.mm index 35608dd092a..6cf9a95c22a 100644 --- a/examples/apple/coreml/executor_runner/main.mm +++ b/examples/apple/coreml/executor_runner/main.mm @@ -18,6 +18,9 @@ #import #import +// #import +// #import + static inline id check_class(id obj, Class cls) { return [obj isKindOfClass:cls] ? obj : nil; } @@ -393,6 +396,17 @@ int main(int argc, char * argv[]) { dump_etdump_gen(etdump_gen.get(), debug_buffer, args); + for (size_t i = 0; i < method->outputs_size(); i++) { + auto tensor = outputs[i].toTensor(); + NSLog(@"tensor[%zu] sizes=", i); + for (size_t j = 0; j < tensor.sizes().size(); j++) { + NSLog(@" %d,", tensor.sizes()[j]); + } + } + // std::cout << executorch::extension::evalue_edge_items(100); + // for (int i = 0; i < method->outputs_size(); i++) { + // std::cout << "Output " << i << ": " << outputs[i] << std::endl; + // } return EXIT_SUCCESS; } } diff --git a/examples/apple/coreml/scripts/build_executor_runner.sh b/examples/apple/coreml/scripts/build_executor_runner.sh index 9d20f289bf6..cf77af3f51c 100755 --- a/examples/apple/coreml/scripts/build_executor_runner.sh +++ b/examples/apple/coreml/scripts/build_executor_runner.sh @@ -29,7 +29,7 @@ rm -rf "$CMAKE_BUILD_DIR_PATH" # Build executorch echo "ExecuTorch: Building executorch" cmake "$EXECUTORCH_ROOT_PATH" -B"$CMAKE_BUILD_DIR_PATH" \ --DCMAKE_BUILD_TYPE=Release \ +-DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_TOOLCHAIN_FILE="$IOS_TOOLCHAIN_PATH" \ -DPLATFORM=MAC_UNIVERSAL \ -DDEPLOYMENT_TARGET=13.0 \ diff --git a/examples/apple/coreml/scripts/export.py b/examples/apple/coreml/scripts/export.py index 53316ea2001..950548809ef 100644 --- a/examples/apple/coreml/scripts/export.py +++ b/examples/apple/coreml/scripts/export.py @@ -175,9 +175,12 @@ def main(): edge_program_manager = exir.to_edge(exir_program_aten) edge_copy = copy.deepcopy(edge_program_manager) partitioner = CoreMLPartitioner( - skip_ops_for_coreml_delegation=None, compile_specs=compile_specs + skip_ops_for_coreml_delegation=["linear.bias", "linear.weight"], + compile_specs=compile_specs, ) + breakpoint() delegated_program_manager = edge_program_manager.to_backend(partitioner) + breakpoint() exec_program = delegated_program_manager.to_executorch( config=exir.ExecutorchBackendConfig(extract_delegate_segments=True) ) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index f3822b6866d..7a0545d4ea4 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -711,6 +711,7 @@ def _export_llama(modelname, args) -> LLMEdgeManager: # noqa: C901 logging.info("Generated etrecord.bin") else: builder = builder_exported_to_edge.to_backend(partitioners) + breakpoint() if args.num_sharding > 0 and args.qnn: from executorch.backends.qualcomm.utils.utils import canonicalize_program @@ -825,6 +826,8 @@ def _load_llama_model( output_prune_map_path=output_prune_map_path, args=args, ) + model(*example_inputs) + breakpoint() if dtype_override: assert isinstance( dtype_override, DType @@ -970,6 +973,7 @@ def _get_source_transforms( # noqa else: transforms.append(replace_sdpa_with_simple_sdpa) transforms.append(replace_kv_cache_with_coreml_kv_cache) + transforms.append(replace_causal_mask) if args.vulkan: transforms.append(replace_with_vulkan_rotary_emb) diff --git a/examples/models/llama/llama_transformer.py b/examples/models/llama/llama_transformer.py index 76e8730328b..c1784ee89fc 100644 --- a/examples/models/llama/llama_transformer.py +++ b/examples/models/llama/llama_transformer.py @@ -326,6 +326,8 @@ def forward( bsz, seqlen, _ = x.shape # QKV + # x.shape = [2048] + print("lfq shape: ", x.shape) q, k, v = self.wq(x), self.wk(x), self.wv(x) # We need view_copy elimination q = q.view(bsz, seqlen, self.n_local_heads, self.head_dim) diff --git a/examples/models/toy_model/model.py b/examples/models/toy_model/model.py index 9ebe42e6621..0ec828072e1 100644 --- a/examples/models/toy_model/model.py +++ b/examples/models/toy_model/model.py @@ -27,7 +27,7 @@ def get_example_inputs(self): class LinearModule(torch.nn.Module, EagerModelBase): def __init__(self): super().__init__() - self.linear = torch.nn.Linear(3, 3) + self.linear = torch.nn.Linear(768, 768) def forward(self, arg): return self.linear(arg) @@ -36,7 +36,7 @@ def get_eager_model(self) -> torch.nn.Module: return self def get_example_inputs(self): - return (torch.randn(3, 3),) + return (torch.randn(1, 768),) class AddModule(torch.nn.Module, EagerModelBase): diff --git a/exir/backend/utils.py b/exir/backend/utils.py index fb5e16c6bd0..550324d308e 100644 --- a/exir/backend/utils.py +++ b/exir/backend/utils.py @@ -380,7 +380,8 @@ def tag_constant_data(edge_program: ExportedProgram) -> None: ) # tag the data node with the same tag as the last user if len(user_tags) > 0: - node.meta["delegation_tag"] = user_tags.pop() + breakpoint() + node.meta["delegation_tag"] = None # user_tags.pop() def tag_mutated_buffer(edge_program: ExportedProgram) -> None: diff --git a/exir/program/_program.py b/exir/program/_program.py index b136d6cead9..c9e1d84e88e 100644 --- a/exir/program/_program.py +++ b/exir/program/_program.py @@ -1279,6 +1279,7 @@ def to_backend( EdgeProgramManager: A copy of the calling EdgeProgramManager with the specified subgraphs lowered. """ + breakpoint() new_edge_programs: Dict[str, ExportedProgram] = {} if isinstance(partitioner, dict): for name, program in self._edge_programs.items():