Skip to content

Commit fb1ea7d

Browse files
Revert for fix conflicts
1 parent fd52664 commit fb1ea7d

File tree

4 files changed

+19
-7
lines changed

4 files changed

+19
-7
lines changed

examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,6 @@ std::string LlamaModelChunk::SelectMethod(
7070
const size_t curTokenSize = GetModelId();
7171
for (const auto& methodName : methodNames) {
7272
const auto matches = utils::extract_substr(methodName, "([0-9]+)t[0-9]+c");
73-
if (matches.empty()) {
74-
continue;
75-
}
7673
ET_CHECK_MSG(
7774
matches.size() == 2, "Invalid method name: %s", methodName.c_str());
7875
// Extract the first match group as token size
@@ -93,6 +90,18 @@ std::string LlamaModelChunk::SelectMethod(
9390
return {};
9491
}
9592

93+
size_t LlamaModelChunk::GetExpectedInputCount() const {
94+
const size_t rotEmbInputCount = kRotEmbInputIndexes.size();
95+
const size_t cacheInputCount = kCacheInputIndexes.size();
96+
return 2 + rotEmbInputCount + cacheInputCount;
97+
}
98+
99+
size_t LlamaModelChunk::GetExpectedOutputCount() const {
100+
const size_t cacheOutputCount = kCacheOutputIndexes.size();
101+
return 1 + cacheOutputCount;
102+
}
103+
104+
96105
void LlamaModelChunk::Initialize() {
97106
LoadModels();
98107
GetModelIoInfo();

examples/mediatek/executor_runner/llama_runner/LlamaModelChunk.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,12 @@ class LlamaModelChunk : public ModelChunk {
144144
// Whether shared weights is used
145145
bool kIsSharedWeightsUsed = false;
146146

147+
// Input/Output Indexes
148+
const size_t kMaskInputIndex;
149+
const std::vector<size_t> kRotEmbInputIndexes;
150+
const std::vector<size_t> kCacheInputIndexes;
151+
const std::vector<size_t> kCacheOutputIndexes;
152+
147153
// Cache
148154
TensorShape mCacheShape;
149155
const LLMType kCacheType;

examples/mediatek/executor_runner/llama_runner/LlamaRuntime.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,6 @@ void LlamaRuntime::Initialize(
5858
usePromptModel ? modelOptions.prompt_token_batch_size : 1;
5959
mTokenBatchSize = initBatchSize;
6060

61-
// Enable SWA if window size is not 0
62-
const bool enableSWA = (modelOptions.window_size != 0);
63-
6461
// Get effective prompt and gen model paths
6562
const auto& [prompt_model_paths, gen_model_paths] = [&] {
6663
if (useSharedWeights) {

examples/mediatek/model_export_scripts/llama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ def export_to_et_ir(
376376
CompileSpec("gno-exp", b""),
377377
CompileSpec("gno-non-4d-tiling", b""),
378378
CompileSpec("ImportForever", struct.pack("?", True)),
379-
CompileSpec("platform-config", platform_b),
379+
CompileSpec("platform-config", b"mt6989"),
380380
CompileSpec("ExtractSharedBlobKey", model_shared_key_name.encode()),
381381
]
382382
method_to_partitioner[f"{model_fname}"] = NeuropilotPartitioner(compile_spec)

0 commit comments

Comments
 (0)