Revert for fix conflicts

neuropilot-captain · neuropilot-captain · commit fb1ea7d47cd3 · 2025-09-09T23:58:34.000+08:00
diff --git a/examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.cpp b/examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.cpp
@@ -70,9 +70,6 @@ std::string LlamaModelChunk::SelectMethod(
   const size_t curTokenSize = GetModelId();
   for (const auto& methodName : methodNames) {
     const auto matches = utils::extract_substr(methodName, "([0-9]+)t[0-9]+c");
-    if (matches.empty()) {
-      continue;
-    }
     ET_CHECK_MSG(
         matches.size() == 2, "Invalid method name: %s", methodName.c_str());
     // Extract the first match group as token size
@@ -93,6 +90,18 @@ std::string LlamaModelChunk::SelectMethod(
   return {};
 }
 
+size_t LlamaModelChunk::GetExpectedInputCount() const {
+  const size_t rotEmbInputCount = kRotEmbInputIndexes.size();
+  const size_t cacheInputCount = kCacheInputIndexes.size();
+  return 2 + rotEmbInputCount + cacheInputCount;
+}
+
+size_t LlamaModelChunk::GetExpectedOutputCount() const {
+  const size_t cacheOutputCount = kCacheOutputIndexes.size();
+  return 1 + cacheOutputCount;
+}
+
+
 void LlamaModelChunk::Initialize() {
   LoadModels();
   GetModelIoInfo();
diff --git a/examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.h b/examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.h
@@ -144,6 +144,12 @@ class LlamaModelChunk : public ModelChunk {
   // Whether shared weights is used
   bool kIsSharedWeightsUsed = false;
 
+  // Input/Output Indexes
+  const size_t kMaskInputIndex;
+  const std::vector<size_t> kRotEmbInputIndexes;
+  const std::vector<size_t> kCacheInputIndexes;
+  const std::vector<size_t> kCacheOutputIndexes;
+
   // Cache
   TensorShape mCacheShape;
   const LLMType kCacheType;
diff --git a/examples/mediatek/executor_runner/llama_runner/LlamaRuntime.cpp b/examples/mediatek/executor_runner/llama_runner/LlamaRuntime.cpp
@@ -58,9 +58,6 @@ void LlamaRuntime::Initialize(
       usePromptModel ? modelOptions.prompt_token_batch_size : 1;
   mTokenBatchSize = initBatchSize;
 
-  // Enable SWA if window size is not 0
-  const bool enableSWA = (modelOptions.window_size != 0);
-
   // Get effective prompt and gen model paths
   const auto& [prompt_model_paths, gen_model_paths] = [&] {
     if (useSharedWeights) {
diff --git a/examples/mediatek/model_export_scripts/llama.py b/examples/mediatek/model_export_scripts/llama.py
@@ -376,7 +376,7 @@ def export_to_et_ir(
             CompileSpec("gno-exp", b""),
             CompileSpec("gno-non-4d-tiling", b""),
             CompileSpec("ImportForever", struct.pack("?", True)),
-            CompileSpec("platform-config", platform_b),
+            CompileSpec("platform-config", b"mt6989"),
             CompileSpec("ExtractSharedBlobKey", model_shared_key_name.encode()),
         ]
         method_to_partitioner[f"{model_fname}"] = NeuropilotPartitioner(compile_spec)

Original file line number	Diff line number	Diff line change
`@@ -376,7 +376,7 @@ def export_to_et_ir(`
`376`	`376`	`CompileSpec("gno-exp", b""),`
`377`	`377`	`CompileSpec("gno-non-4d-tiling", b""),`
`378`	`378`	`CompileSpec("ImportForever", struct.pack("?", True)),`
`379`		`- CompileSpec("platform-config", platform_b),`
	`379`	`+ CompileSpec("platform-config", b"mt6989"),`
`380`	`380`	`CompileSpec("ExtractSharedBlobKey", model_shared_key_name.encode()),`
`381`	`381`	`]`
`382`	`382`	`method_to_partitioner[f"{model_fname}"] = NeuropilotPartitioner(compile_spec)`