Metal: mlock model weights in memory (#170)

Maratyszcza · web-flow · commit 7f3c896dad67 · 2025-09-02T23:16:12.000-07:00
diff --git a/gpt_oss/metal/source/include/internal/model.h b/gpt_oss/metal/source/include/internal/model.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <stdatomic.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -54,6 +55,8 @@ struct gptoss_model {
     // Once the batch size is reached, we process it to fill the KV cache.
     size_t max_batch_tokens;
 
+    bool lock_memory;
+
     size_t weights_size;
     size_t allocation_size;
 
diff --git a/gpt_oss/metal/source/model.c b/gpt_oss/metal/source/model.c
@@ -290,6 +290,12 @@ enum gptoss_status GPTOSS_ABI gptoss_model_create_from_file(
 
     prefetch_fd(fd, model_mapping_start, model_mapping_size, path);
 
+    if (mlock(model_mapping_ptr, model_mapping_size) != 0) {
+        GPTOSS_LOG_WARNING("mlock(%s, size=%zu) failed with error %d", path, model_mapping_size, errno);
+    } else {
+        model->lock_memory = true;
+    }
+
     // Initialize Metal
     status = gptoss_metal_device_create_system_default(&model->device);
     if (status != gptoss_status_success) {
@@ -497,6 +503,12 @@ enum gptoss_status GPTOSS_ABI gptoss_model_release(
             // Weight buffers
 
             if (model->mapping_ptr != NULL && model->mapping_size != 0) {
+                if (model->lock_memory) {
+                    if (munlock(model->mapping_ptr, model->mapping_size) != 0) {
+                        GPTOSS_LOG_WARNING("munlock for model weight mapping failed with error %d", errno);
+                    }
+                }
+
                 if (munmap(model->mapping_ptr, model->mapping_size) != 0) {
                     GPTOSS_LOG_WARNING("munmap for model weight mapping failed with error %d", errno);
                 }