Skip to content

Commit 649649f

Browse files
authored
feat: qwen3 moe vulkan support. (#258)
1 parent 64f52bf commit 649649f

25 files changed

+1157
-380
lines changed

src/llm.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,7 @@ void loadLlmNetWeight(const char *path, LlmNet *net, NnRootWeightLoader *loader)
621621
printf("💿 Loading weights...\n");
622622
#endif
623623

624+
Timer timer;
624625
NnByte *data = (NnByte *)file.data;
625626
NnByte *b = &data[net->header->headerSize];
626627
b += loader->loadRoot("embedding", 0, net->tokenEmbeddingSize.nBytes, b);
@@ -651,6 +652,9 @@ void loadLlmNetWeight(const char *path, LlmNet *net, NnRootWeightLoader *loader)
651652

652653
b += loader->loadAll("block_norm_0", layerIndex, net->rmsNormSize.nBytes, b);
653654
b += loader->loadAll("block_norm_1", layerIndex, net->rmsNormSize.nBytes, b);
655+
656+
if (timer.elapsedMiliseconds() > 10000)
657+
printf("💿 Loaded %u/%u\n", layerIndex + 1, net->header->nLayers);
654658
}
655659

656660
b += loader->loadAll("final_norm", 0u, net->rmsNormSize.nBytes, b);

src/nn/nn-core.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,9 @@ const char *opCodeToString(NnOpCode code) {
8686
if (code == OP_CAST) return "CAST";
8787
if (code == OP_REPEAT_Z) return "REPEAT_Z";
8888
if (code == OP_SHIFT) return "SHIFT";
89+
if (code == OP_SOFTMAX) return "SOFTMAX";
8990
if (code == OP_MOE_GATE) return "MOE_GATE";
90-
throw std::invalid_argument("Unknown op code");
91+
throw std::invalid_argument("Unknown op code: " + std::to_string(code));
9192
}
9293

9394
const char *opQuantTypeToString(NnOpQuantType type) {

src/nn/nn-cpu-ops.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,8 +1149,9 @@ static void matmulForward_F32_F32_F32(NnUint nThreads, NnUint threadIndex, NnUin
11491149
? 0u
11501150
: (NnUint)activeExpertIndexes[y * config->nActiveExperts + e];
11511151

1152+
float *output = (float *)context->output[e * context->outputSize.y + y];
11521153
matmul_F32_F32_F32(
1153-
(float *)context->output[e * context->outputSize.y + y],
1154+
output,
11541155
(float *)context->input[e * context->inputSize.y + y],
11551156
(float *)&context->weight[activeExpertIndex * context->weightSize.nBytesXY],
11561157
context->weightSize.y,
@@ -1176,14 +1177,16 @@ static void matmulForward_Q80_Q40_F32(NnUint nThreads, NnUint threadIndex, NnUin
11761177
? 0u
11771178
: (NnUint)activeExpertIndexes[y * config->nActiveExperts + e];
11781179

1180+
float *output = (float *)context->output[e * context->outputSize.y + y];
11791181
matmul_Q80_Q40_F32(
1180-
(float *)context->output[e * context->outputSize.y + y],
1182+
output,
11811183
(NnBlockQ80 *)context->input[e * context->inputSize.y + y],
11821184
(NnBlockQ40 *)&context->weight[activeExpertIndex * context->weightSize.nBytesXY],
11831185
context->weightSize.y,
11841186
context->weightSize.x,
11851187
nThreads,
11861188
threadIndex);
1189+
DEBUG_VECTOR(context, "output", output);
11871190
}
11881191
}
11891192
}
@@ -1478,9 +1481,13 @@ static void moeGateForward_F32_F32(NnUint nThreads, NnUint threadIndex, NnUint b
14781481
for (NnUint k = 0u; k < config->k; k++) {
14791482
const NnUint p = pos[k];
14801483
indexes[y * config->k + k] = (float)p;
1484+
1485+
// (nActiveExperts, nBatches, 1)
14811486
float *output = (float *)context->output[k * context->outputSize.y + y];
14821487
*output = input[p] / sum;
14831488
}
1489+
1490+
DEBUG_VECTOR(context, "indexes", (&indexes[y * config->k]));
14841491
}
14851492
}
14861493

0 commit comments

Comments
 (0)