Updating code to enable mid-epoch cancellation

dev-nid · dev-nid · commit 107d16e64ca1 · 2025-11-17T11:28:30.000+01:00
diff --git a/include/llama.h b/include/llama.h
@@ -1412,6 +1412,14 @@ extern "C" {
     // Call this before calling llama_opt_init() again on the same context
     LLAMA_API void llama_opt_cleanup(struct llama_context * lctx);
     
+    // Request early exit from training epoch (thread-safe)
+    // Call this from a callback or another thread to stop training after the current batch
+    LLAMA_API void llama_opt_request_stop(struct llama_context * lctx);
+    
+    // Reset the stop flag to allow training to continue
+    // Call this before resuming training after a pause
+    LLAMA_API void llama_opt_reset_stop(struct llama_context * lctx);
+    
     // LoRA training parameters
     enum llama_lora_target_module {
         LLAMA_LORA_TARGET_ATTN_Q    = 1 << 0,
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -2138,6 +2138,10 @@ void llama_context::opt_epoch_iter(
     memory->clear(true);
 
     for (uint32_t pos_ctx = 0; pos_ctx < n_ctx; pos_ctx += n_batch) {
+        // Check for early exit request before processing context batch
+        if (training_should_stop.load(std::memory_order_acquire)) {
+            return;
+        }
         batch.n_tokens = n_batch;
         for (uint32_t pos_batch = 0; pos_batch < n_batch; ++pos_batch) {
             batch.token   [pos_batch]    = tokens[pos_ctx + pos_batch];
@@ -2174,6 +2178,11 @@ void llama_context::opt_epoch_iter(
 
         uint32_t pos_batch = 0;
         do {
+            // Check for early exit request before processing ubatch
+            if (training_should_stop.load(std::memory_order_acquire)) {
+                break;
+            }
+            
             const auto & ubatch = mctx->get_ubatch();
 
             n_outputs = ubatch.n_tokens;
@@ -2263,6 +2272,11 @@ void llama_context::opt_epoch_iter(
             ggml_free(ctx_compute_opt);
 
             pos_batch += ubatch.n_tokens;
+            
+            // Check for early exit request after processing ubatch
+            if (training_should_stop.load(std::memory_order_acquire)) {
+                break;
+            }
         } while (mctx->next());
     }
 }
@@ -2275,6 +2289,9 @@ void llama_context::opt_epoch(
         ggml_opt_epoch_callback   callback_train,
         ggml_opt_epoch_callback   callback_eval,
         int64_t                   resume_from_batch) {
+    // Reset stop flag at the start of each epoch to ensure clean state
+    training_should_stop.store(false, std::memory_order_release);
+    
     const uint32_t n_ctx    = this->n_ctx();
     const uint32_t n_batch  = std::min(cparams.n_batch,  n_ctx);
     const uint32_t n_ubatch = std::min(cparams.n_ubatch, n_batch);
@@ -2295,6 +2312,11 @@ void llama_context::opt_epoch(
     int64_t t_loop_start = ggml_time_us();
     int64_t ndata_in_loop = idata_split*ubatch_per_ctx;
     for (; idata < idata_split; ++idata) {
+        // Check for early exit request before processing batch
+        if (training_should_stop.load(std::memory_order_acquire)) {
+            break;
+        }
+        
         constexpr bool train = true;
         const int64_t idata_in_loop = idata*ubatch_per_ctx;
 
@@ -2306,11 +2328,21 @@ void llama_context::opt_epoch(
         }
         opt_epoch_iter(dataset, result_train, tokens, labels_sparse, masks_sparse, batch,
             callback_train, train, idata_in_loop, ndata_in_loop, t_loop_start);
+        
+        // Check again after iteration in case it was set during processing
+        if (training_should_stop.load(std::memory_order_acquire)) {
+            break;
+        }
     }
 
     t_loop_start = ggml_time_us();
     ndata_in_loop = (ndata - idata_split)*ubatch_per_ctx;
     for (; idata < ndata; ++idata) {
+        // Check for early exit request before processing batch
+        if (training_should_stop.load(std::memory_order_acquire)) {
+            break;
+        }
+        
         constexpr bool train = false;
         const int64_t idata_in_loop = (idata - idata_split)*ubatch_per_ctx;
 
@@ -2321,6 +2353,11 @@ void llama_context::opt_epoch(
         }
         opt_epoch_iter(dataset, result_eval, tokens, labels_sparse, masks_sparse, batch,
             callback_eval, train, idata_in_loop, ndata_in_loop, t_loop_start);
+        
+        // Check again after iteration in case it was set during processing
+        if (training_should_stop.load(std::memory_order_acquire)) {
+            break;
+        }
     }
 
     llama_batch_free(batch);
@@ -2357,6 +2394,14 @@ void llama_context::opt_cleanup() {
     }
 }
 
+void llama_context::opt_request_stop() {
+    training_should_stop.store(true, std::memory_order_release);
+}
+
+void llama_context::opt_reset_stop() {
+    training_should_stop.store(false, std::memory_order_release);
+}
+
 //
 // interface implementation
 //
@@ -2928,3 +2973,11 @@ bool llama_opt_load_state(struct llama_context * ctx, const char* filename) {
 void llama_opt_cleanup(struct llama_context * ctx) {
     ctx->opt_cleanup();
 }
+
+void llama_opt_request_stop(struct llama_context * ctx) {
+    ctx->opt_request_stop();
+}
+
+void llama_opt_reset_stop(struct llama_context * ctx) {
+    ctx->opt_reset_stop();
+}
diff --git a/src/llama-context.h b/src/llama-context.h
@@ -8,6 +8,7 @@
 #include "ggml-cpp.h"
 #include "ggml-opt.h"
 
+#include <atomic>
 #include <map>
 #include <vector>
 
@@ -169,6 +170,12 @@ struct llama_context {
     
     // Clean up optimizer context to free memory and allow reinitialization
     void opt_cleanup();
+    
+    // Request early exit from training epoch (thread-safe)
+    void opt_request_stop();
+    
+    // Reset the stop flag to allow training to continue
+    void opt_reset_stop();
 
     void opt_epoch_iter(
             ggml_opt_dataset_t               dataset,
@@ -280,6 +287,9 @@ struct llama_context {
     bool should_load_optimizer_tensors = false;
     bool optimizer_tensors_loaded = false;
     ggml_opt_loss_type opt_loss_type = GGML_OPT_LOSS_TYPE_CROSS_ENTROPY;
+    
+    // early exit flag for training epochs (thread-safe)
+    std::atomic<bool> training_should_stop{false};
 
     ggml_threadpool_t threadpool       = nullptr;
     ggml_threadpool_t threadpool_batch = nullptr;