Fix conflicts

neuropilot-captain · neuropilot-captain · commit 018e5742a902 · 2025-09-10T00:23:10.000+08:00
diff --git a/examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.cpp b/examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.cpp
@@ -70,6 +70,9 @@ std::string LlamaModelChunk::SelectMethod(
   const size_t curTokenSize = GetModelId();
   for (const auto& methodName : methodNames) {
     const auto matches = utils::extract_substr(methodName, "([0-9]+)t[0-9]+c");
+    if (matches.empty()) {
+      continue;
+    }
     ET_CHECK_MSG(
         matches.size() == 2, "Invalid method name: %s", methodName.c_str());
     // Extract the first match group as token size
@@ -90,18 +93,6 @@ std::string LlamaModelChunk::SelectMethod(
   return {};
 }
 
-size_t LlamaModelChunk::GetExpectedInputCount() const {
-  const size_t rotEmbInputCount = kRotEmbInputIndexes.size();
-  const size_t cacheInputCount = kCacheInputIndexes.size();
-  return 2 + rotEmbInputCount + cacheInputCount;
-}
-
-size_t LlamaModelChunk::GetExpectedOutputCount() const {
-  const size_t cacheOutputCount = kCacheOutputIndexes.size();
-  return 1 + cacheOutputCount;
-}
-
-
 void LlamaModelChunk::Initialize() {
   LoadModels();
   GetModelIoInfo();
diff --git a/examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.h b/examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.h
@@ -143,12 +143,6 @@ class LlamaModelChunk : public ModelChunk {
   // Whether shared weights is used
   bool kIsSharedWeightsUsed = false;
 
-  // Input/Output Indexes
-  const size_t kMaskInputIndex;
-  const std::vector<size_t> kRotEmbInputIndexes;
-  const std::vector<size_t> kCacheInputIndexes;
-  const std::vector<size_t> kCacheOutputIndexes;
-
   // Cache
   TensorShape mCacheShape;
   const LLMType kCacheType;
diff --git a/examples/mediatek/executor_runner/llama_runner/LlamaRuntime.cpp b/examples/mediatek/executor_runner/llama_runner/LlamaRuntime.cpp
@@ -58,6 +58,9 @@ void LlamaRuntime::Initialize(
       usePromptModel ? modelOptions.prompt_token_batch_size : 1;
   mTokenBatchSize = initBatchSize;
 
+  // Enable SWA if window size is not 0
+  const bool enableSWA = (modelOptions.window_size != 0);
+
   // Get effective prompt and gen model paths
   const auto& [prompt_model_paths, gen_model_paths] = [&] {
     if (useSharedWeights) {
diff --git a/examples/mediatek/model_export_scripts/llama.py b/examples/mediatek/model_export_scripts/llama.py
@@ -376,7 +376,7 @@ def export_to_et_ir(
             CompileSpec("gno-exp", b""),
             CompileSpec("gno-non-4d-tiling", b""),
             CompileSpec("ImportForever", struct.pack("?", True)),
-            CompileSpec("platform-config", b"mt6989"),
+            CompileSpec("platform-config", platform_b),
             CompileSpec("ExtractSharedBlobKey", model_shared_key_name.encode()),
         ]
         method_to_partitioner[f"{model_fname}"] = NeuropilotPartitioner(compile_spec)

Original file line number	Diff line number	Diff line change
`@@ -376,7 +376,7 @@ def export_to_et_ir(`
`376`	`376`	`CompileSpec("gno-exp", b""),`
`377`	`377`	`CompileSpec("gno-non-4d-tiling", b""),`
`378`	`378`	`CompileSpec("ImportForever", struct.pack("?", True)),`
`379`		`- CompileSpec("platform-config", b"mt6989"),`
	`379`	`+ CompileSpec("platform-config", platform_b),`
`380`	`380`	`CompileSpec("ExtractSharedBlobKey", model_shared_key_name.encode()),`
`381`	`381`	`]`
`382`	`382`	`method_to_partitioner[f"{model_fname}"] = NeuropilotPartitioner(compile_spec)`