Add shared weights flow to llama export script

neuropilot-captain · neuropilot-captain · commit a0bfa5dec253 · 2025-06-03T14:58:23.000+08:00
diff --git a/backends/mediatek/preprocess.py b/backends/mediatek/preprocess.py
@@ -146,7 +146,6 @@ def preprocess_multimethod(
         infos_dict = collections.defaultdict(list)
         models_dict = collections.defaultdict(list)
         result_dict = collections.defaultdict(list)
-        preprocess_result_list = []
         for method_name, method_results in preprocess_results.items():
             for idx, result in enumerate(method_results):
                 shared_blob_key = None
@@ -168,12 +167,7 @@ def preprocess_multimethod(
         data_store_output_dict = dict()
         for key, models in models_dict.items():
             ndm = NamedDataStore()
-            print('------------------')
-            print(key)
-            print('Original DLA sizes: {}'.format([len(model) for model in models]))
             blob, new_models = mtk_neuron.extract_shared_data(models, options='-e union')
-            print('Extracted data size: {}'.format(len(blob)))
-            print('New DLA sizes: {}'.format([len(model) for model in new_models]))
             ndm.add_named_data(key, bytes(blob))
             data_store_output_dict[key] = ndm.get_named_data_store_output()
             models.clear()
diff --git a/examples/mediatek/aot_utils/llm_utils/utils.py b/examples/mediatek/aot_utils/llm_utils/utils.py
@@ -336,10 +336,10 @@ def generate_mask(
     return combined_mask.copy()
 
 
-def get_dest_path(output_folder, exp_name, shape, chunk_idx):
-    dest_folder_root = output_folder + f"_{shape}"
+def get_dest_path(output_folder, exp_name, shape=None, chunk_idx=0):
+    dest_folder_root = output_folder + f"{f'_{shape}' if shape is not None else ''}"
     os.makedirs(dest_folder_root, exist_ok=True)
-    fname = f"{exp_name}_{shape}_{chunk_idx}.pte"
+    fname = f"{exp_name}{f'_{shape}' if shape is not None else ''}_{chunk_idx}.pte"
     dest_path = os.path.join(dest_folder_root, fname)
 
     return dest_path
diff --git a/examples/mediatek/model_export_scripts/llama.py b/examples/mediatek/model_export_scripts/llama.py
@@ -42,6 +42,7 @@
     NeuropilotQuantizer,
     Precision,
 )
+from executorch.exir.backend.backend_api import to_backend, MethodProgramsPartitionerSpec
 from executorch.exir.backend.backend_details import CompileSpec
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from tqdm import tqdm
@@ -331,51 +332,65 @@ def export_to_et_ir(
         prepared_graph(*example_inputs)  # dummy calibration
     converted_graph = convert_pt2e(prepared_graph, fold_quantize=False)
 
-    print("Getting ATen Dialect Graph")
+    method_to_edge_program = {}
+    method_to_partitioner = {}
+    edge_compile_config=exir.EdgeCompileConfig(_check_ir_validity=False)
+
+    model_shared_key_name = f'{exp_name}_{chunk_idx}'
+
     # Fixed Shape Export Here
     for shape, ntok_and_cache in export_shapes.items():
-        dest_path = get_dest_path(output_folder, exp_name, shape, chunk_idx)
-        print(f"Exporting Shape {shape} to:\n{dest_path}")
+        model_fname = f'{exp_name}_{shape}_{chunk_idx}'
         example_inputs = model.get_example_inputs(*ntok_and_cache)
+        print(f"Getting ATen Dialect Graph for {exp_name} {shape} chunk {chunk_idx}")
         aten_dialect: exir.ExportedProgram = torch.export.export(
             converted_graph, example_inputs, strict=True
         )
 
-        print("Lowering to Edge Dialect Graph")
-        edge_program: exir.EdgeProgramManager = exir.to_edge(
-            aten_dialect,
-            compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
-        )
+        method_to_edge_program[f'{model_fname}'] = exir.to_edge(aten_dialect).exported_program()
         del aten_dialect
 
-        print("Delegating Edge Program to Neuropilot Backend")
         compile_spec = [
             CompileSpec("gno", struct.pack("3s", b"LTS")),
             CompileSpec("gno-exp", struct.pack("0s", b"")),
             CompileSpec("gno-non-4d-tiling", struct.pack("0s", b"")),
             CompileSpec("ImportForever", struct.pack("?", True)),
+            CompileSpec("ExtractSharedBlobKey", model_shared_key_name.encode()),
         ]
-        partitioner = NeuropilotPartitioner(compile_spec)
-        delegated_program = edge_program.to_backend(partitioner)
-        print("Exported Delegated Program:")
-        print(delegated_program.exported_program())
-        del edge_program
-
-        print("Transforming delegated program to executorch backend")
-        executorch_program = delegated_program.to_executorch(
-            config=exir.ExecutorchBackendConfig(
-                memory_planning_pass=exir.passes.MemoryPlanningPass(
-                    alloc_graph_input=False,
-                    alloc_graph_output=False,
-                ),
-                extract_delegate_segments=True,
-            )
+        method_to_partitioner[f'{model_fname}'] = NeuropilotPartitioner(compile_spec)
+
+    print("Delegating Edge Program to Neuropilot Backend")
+    delegated_program = to_backend(
+        MethodProgramsPartitionerSpec(
+            method_to_edge_program,
+            method_to_partitioner
         )
+    )
 
-        print(f"ET Model Dest: {dest_path}\n")
-        os.makedirs(dest_path.rsplit("/", 1)[0], exist_ok=True)
-        with open(dest_path, "wb") as file:
-            file.write(executorch_program.buffer)
+    edge_manager = exir.EdgeProgramManager(
+        delegated_program,
+        compile_config=edge_compile_config
+    )
+    del delegated_program
+
+    print("Transforming delegated program to executorch backend")
+    executorch_program = edge_manager.to_executorch(
+        config=exir.ExecutorchBackendConfig(
+            memory_planning_pass=exir.passes.MemoryPlanningPass(
+                alloc_graph_input=False,
+                alloc_graph_output=False,
+            ),
+            extract_delegate_segments=True,
+        )
+    )
+    del edge_manager
+    print(f'\n Model Size: {len(executorch_program.buffer)}')
+
+    dest_path = get_dest_path(output_folder, exp_name, None, chunk_idx)
+    print(f"{exp_name} ET Model chunk {chunk_idx} Dest: {dest_path}\n")
+    os.makedirs(dest_path.rsplit("/", 1)[0], exist_ok=True)
+    with open(dest_path, "wb") as file:
+        file.write(executorch_program.buffer)
 
 
 def main():