Skip to content

Commit 018e574

Browse files
Fix conflicts
1 parent a427f62 commit 018e574

File tree

4 files changed

+7
-19
lines changed

4 files changed

+7
-19
lines changed

examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.cpp

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ std::string LlamaModelChunk::SelectMethod(
7070
const size_t curTokenSize = GetModelId();
7171
for (const auto& methodName : methodNames) {
7272
const auto matches = utils::extract_substr(methodName, "([0-9]+)t[0-9]+c");
73+
if (matches.empty()) {
74+
continue;
75+
}
7376
ET_CHECK_MSG(
7477
matches.size() == 2, "Invalid method name: %s", methodName.c_str());
7578
// Extract the first match group as token size
@@ -90,18 +93,6 @@ std::string LlamaModelChunk::SelectMethod(
9093
return {};
9194
}
9295

93-
size_t LlamaModelChunk::GetExpectedInputCount() const {
94-
const size_t rotEmbInputCount = kRotEmbInputIndexes.size();
95-
const size_t cacheInputCount = kCacheInputIndexes.size();
96-
return 2 + rotEmbInputCount + cacheInputCount;
97-
}
98-
99-
size_t LlamaModelChunk::GetExpectedOutputCount() const {
100-
const size_t cacheOutputCount = kCacheOutputIndexes.size();
101-
return 1 + cacheOutputCount;
102-
}
103-
104-
10596
void LlamaModelChunk::Initialize() {
10697
LoadModels();
10798
GetModelIoInfo();

examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,6 @@ class LlamaModelChunk : public ModelChunk {
143143
// Whether shared weights is used
144144
bool kIsSharedWeightsUsed = false;
145145

146-
// Input/Output Indexes
147-
const size_t kMaskInputIndex;
148-
const std::vector<size_t> kRotEmbInputIndexes;
149-
const std::vector<size_t> kCacheInputIndexes;
150-
const std::vector<size_t> kCacheOutputIndexes;
151-
152146
// Cache
153147
TensorShape mCacheShape;
154148
const LLMType kCacheType;

examples/mediatek/executor_runner/llama_runner/LlamaRuntime.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ void LlamaRuntime::Initialize(
5858
usePromptModel ? modelOptions.prompt_token_batch_size : 1;
5959
mTokenBatchSize = initBatchSize;
6060

61+
// Enable SWA if window size is not 0
62+
const bool enableSWA = (modelOptions.window_size != 0);
63+
6164
// Get effective prompt and gen model paths
6265
const auto& [prompt_model_paths, gen_model_paths] = [&] {
6366
if (useSharedWeights) {

examples/mediatek/model_export_scripts/llama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ def export_to_et_ir(
376376
CompileSpec("gno-exp", b""),
377377
CompileSpec("gno-non-4d-tiling", b""),
378378
CompileSpec("ImportForever", struct.pack("?", True)),
379-
CompileSpec("platform-config", b"mt6989"),
379+
CompileSpec("platform-config", platform_b),
380380
CompileSpec("ExtractSharedBlobKey", model_shared_key_name.encode()),
381381
]
382382
method_to_partitioner[f"{model_fname}"] = NeuropilotPartitioner(compile_spec)

0 commit comments

Comments
 (0)