add model member function to build mtp graph, to be called from speculative.cpp

F1LM1 · F1LM1 · commit 03231da69eec · 2025-08-12T01:03:59.000-04:00
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -18673,6 +18673,22 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
     return llm->res->get_gf();
 }
 
+ggml_cgraph* llama_model::build_mtp_graph(const llm_graph_params& params,
+    ggml_tensor* hidden_state_inp, llama_token last_token_id, int n_past) const {
+    std::unique_ptr<llm_graph_context> llm;
+
+    switch (arch) {
+    case LLM_ARCH_GLM4_MOE:
+    {
+        llm = std::make_unique<llm_build_glm4_moe_mtp>(*this, params, hidden_state_inp, last_token_id, n_past);
+    } break;
+    default:
+        GGML_ABORT("fatal error");
+    }
+
+    return llm->res->get_gf();
+}
+
 //
 // interface implementation
 //
diff --git a/src/llama-model.h b/src/llama-model.h
@@ -475,6 +475,8 @@ struct llama_model {
 
     // TODO: move this to new llm_arch_model_i interface
     ggml_cgraph * build_graph(const llm_graph_params & params) const;
+    ggml_cgraph * build_mtp_graph(const llm_graph_params & params,
+        ggml_tensor * hidden_state_inp, llama_token last_token_id, int n_past) const;
 
 private:
     struct impl;