Update llama-context-mmojo.cpp

BradHutchings · web-flow · commit 8e79201b9f1e · 2025-08-01T07:59:32.000-07:00
Signed-off-by: Brad Hutchings &lt;brad@componentx.com&gt;
diff --git a/src/llama-context-mmojo.cpp b/src/llama-context-mmojo.cpp
@@ -113,6 +113,15 @@ llama_context::llama_context(
         }
     }
 
+    {
+        const char * LLAMA_GRAPH_REUSE_DISABLE = getenv("LLAMA_GRAPH_REUSE_DISABLE");
+        graph_reuse_disable = LLAMA_GRAPH_REUSE_DISABLE ? (atoi(LLAMA_GRAPH_REUSE_DISABLE) != 0) : graph_reuse_disable;
+
+        if (graph_reuse_disable) {
+            LLAMA_LOG_WARN("%s: graph reuse disabled\n", __func__);
+        }
+    }
+
     const uint32_t n_ctx_per_seq = cparams.n_ctx / cparams.n_seq_max;
 
     LLAMA_LOG_INFO("%s: n_seq_max     = %u\n",   __func__, cparams.n_seq_max);
@@ -716,7 +725,7 @@ llm_graph_result * llama_context::process_ubatch(const llama_ubatch & ubatch, ll
     // in order to correctly reuse a graph, it's full topology has to be uniquely determined by these parameters
     const auto gparams = graph_params(res, ubatch, mctx, gtype);
 
-    if (res->can_reuse(gparams)) {
+    if (!graph_reuse_disable && res->can_reuse(gparams)) {
         //LLAMA_LOG_DEBUG("%s: reusing previous graph\n", __func__);
 
         n_reused++;