[common] Pure interface for files

jesusmb1995 · jesusmb1995 · commit f8942e718569 · 2025-07-30T17:13:15.000+02:00
Convert llama_file to a pure virtual class that can be overriden by multiple implementations (disk, single memory buffer, ...)
diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp
@@ -347,7 +347,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
 
     // set tensor data
     {
-        llama_file gguf_file(path_lora, "rb");
+        llama_file_disk gguf_file(path_lora, "rb");
         std::vector<uint8_t> read_buf;
         auto set_tensor = [&](ggml_tensor * orig, ggml_tensor * dev) {
             size_t offs = gguf_get_data_offset(ctx_gguf.get()) + gguf_get_tensor_offset(ctx_gguf.get(), gguf_find_tensor(ctx_gguf.get(), orig->name));
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1614,7 +1614,7 @@ size_t llama_context::state_seq_set_data(llama_seq_id seq_id, const uint8_t * sr
 }
 
 bool llama_context::state_load_file(const char * filepath, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out) {
-    llama_file file(filepath, "rb");
+    llama_file_disk file(filepath, "rb");
 
     // sanity checks
     {
@@ -1657,7 +1657,7 @@ bool llama_context::state_load_file(const char * filepath, llama_token * tokens_
 }
 
 bool llama_context::state_save_file(const char * filepath, const llama_token * tokens, size_t n_token_count) {
-    llama_file file(filepath, "wb");
+    llama_file_disk file(filepath, "wb");
 
     file.write_u32(LLAMA_SESSION_MAGIC);
     file.write_u32(LLAMA_SESSION_VERSION);
@@ -1674,7 +1674,7 @@ bool llama_context::state_save_file(const char * filepath, const llama_token * t
 }
 
 size_t llama_context::state_seq_load_file(llama_seq_id seq_id, const char * filepath, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out) {
-    llama_file file(filepath, "rb");
+    llama_file_disk file(filepath, "rb");
 
     // version checks
     {
@@ -1717,7 +1717,7 @@ size_t llama_context::state_seq_load_file(llama_seq_id seq_id, const char * file
 }
 
 size_t llama_context::state_seq_save_file(llama_seq_id seq_id, const char * filepath, const llama_token * tokens, size_t n_token_count) {
-    llama_file file(filepath, "wb");
+    llama_file_disk file(filepath, "wb");
 
     file.write_u32(LLAMA_STATE_SEQ_MAGIC);
     file.write_u32(LLAMA_STATE_SEQ_VERSION);
diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp
@@ -54,9 +54,7 @@ static std::string llama_format_win_err(DWORD err) {
 }
 #endif
 
-// llama_file
-
-struct llama_file::impl {
+struct llama_file_disk::impl {
 #if defined(_WIN32)
     HANDLE fp_win32;
     std::string GetErrorMessageWin32(DWORD error_code) const {
@@ -241,13 +239,13 @@ struct llama_file::impl {
     size_t size;
 };
 
-llama_file::llama_file(const char * fname, const char * mode) : pimpl(std::make_unique<impl>(fname, mode)) {}
-llama_file::~llama_file() = default;
+llama_file_disk::llama_file_disk(const char * fname, const char * mode) : pimpl(std::make_unique<impl>(fname, mode)) {}
+llama_file_disk::~llama_file_disk() = default;
 
-size_t llama_file::tell() const { return pimpl->tell(); }
-size_t llama_file::size() const { return pimpl->size; }
+size_t llama_file_disk::tell() const { return pimpl->tell(); }
+size_t llama_file_disk::size() const { return pimpl->size; }
 
-int llama_file::file_id() const {
+int llama_file_disk::file_id() const {
 #ifdef _WIN32
     return _fileno(pimpl->fp);
 #else
@@ -259,13 +257,13 @@ int llama_file::file_id() const {
 #endif
 }
 
-void llama_file::seek(size_t offset, int whence) const { pimpl->seek(offset, whence); }
-void llama_file::read_raw(void * ptr, size_t len) const { pimpl->read_raw(ptr, len); }
+void llama_file_disk::seek(size_t offset, int whence) const { pimpl->seek(offset, whence); }
+void llama_file_disk::read_raw(void * ptr, size_t len) const { pimpl->read_raw(ptr, len); }
 
-uint32_t llama_file::read_u32() const { return pimpl->read_u32(); }
+uint32_t llama_file_disk::read_u32() const { return pimpl->read_u32(); }
 
-void llama_file::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); }
-void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
+void llama_file_disk::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); }
+void llama_file_disk::write_u32(uint32_t val) const { pimpl->write_u32(val); }
 
 // llama_mmap
 
diff --git a/src/llama-mmap.h b/src/llama-mmap.h
@@ -13,21 +13,36 @@ using llama_mmaps  = std::vector<std::unique_ptr<llama_mmap>>;
 using llama_mlocks = std::vector<std::unique_ptr<llama_mlock>>;
 
 struct llama_file {
-    llama_file(const char * fname, const char * mode);
-    ~llama_file();
+    virtual ~llama_file() = default;
 
-    size_t tell() const;
-    size_t size() const;
+    virtual size_t tell() const = 0;
+    virtual size_t size() const = 0;
+    virtual int file_id() const = 0;
+
+    virtual void seek(size_t offset, int whence) const = 0;
+
+    virtual void read_raw(void * ptr, size_t len) const = 0;
+    virtual uint32_t read_u32() const = 0;
+
+    virtual void write_raw(const void * ptr, size_t len) const = 0;
+    virtual void write_u32(uint32_t val) const = 0;
+};
+
+struct llama_file_disk : public llama_file {
+    llama_file_disk(const char * fname, const char * mode);
+    ~llama_file_disk() override;
 
-    int file_id() const; // fileno overload
+    size_t tell() const override;
+    size_t size() const override;
+    int file_id() const override;
 
-    void seek(size_t offset, int whence) const;
+    void seek(size_t offset, int whence) const override;
 
-    void read_raw(void * ptr, size_t len) const;
-    uint32_t read_u32() const;
+    void read_raw(void * ptr, size_t len) const override;
+    uint32_t read_u32() const override;
 
-    void write_raw(const void * ptr, size_t len) const;
-    void write_u32(uint32_t val) const;
+    void write_raw(const void * ptr, size_t len) const override;
+    void write_u32(uint32_t val) const override;
 
 private:
     struct impl;
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
@@ -500,7 +500,7 @@ llama_model_loader::llama_model_loader(
     get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
     llm_kv = LLM_KV(llm_arch_from_string(arch_name));
 
-    files.emplace_back(new llama_file(fname.c_str(), "rb"));
+    files.emplace_back(new llama_file_disk(fname.c_str(), "rb"));
     contexts.emplace_back(ctx);
 
     // Save tensors data offset of the main file.
@@ -568,7 +568,7 @@ llama_model_loader::llama_model_loader(
                 }
             }
 
-            files.emplace_back(new llama_file(fname_split, "rb"));
+            files.emplace_back(new llama_file_disk(fname_split, "rb"));
             contexts.emplace_back(ctx);
 
             // Save tensors data offset info of the shard.

Original file line number	Diff line number	Diff line change
`@@ -347,7 +347,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_`
`347`	`347`
`348`	`348`	`// set tensor data`
`349`	`349`	`{`
`350`		`- llama_file gguf_file(path_lora, "rb");`
	`350`	`+ llama_file_disk gguf_file(path_lora, "rb");`
`351`	`351`	`std::vector<uint8_t> read_buf;`
`352`	`352`	`auto set_tensor = [&](ggml_tensor * orig, ggml_tensor * dev) {`
`353`	`353`	`size_t offs = gguf_get_data_offset(ctx_gguf.get()) + gguf_get_tensor_offset(ctx_gguf.get(), gguf_find_tensor(ctx_gguf.get(), orig->name));`
Original file line number	Diff line number	Diff line change
`@@ -1614,7 +1614,7 @@ size_t llama_context::state_seq_set_data(llama_seq_id seq_id, const uint8_t * sr`
`1614`	`1614`	`}`
`1615`	`1615`
`1616`	`1616`	`bool llama_context::state_load_file(const char * filepath, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out) {`
`1617`		`- llama_file file(filepath, "rb");`
	`1617`	`+ llama_file_disk file(filepath, "rb");`
`1618`	`1618`
`1619`	`1619`	`// sanity checks`
`1620`	`1620`	`{`
`@@ -1657,7 +1657,7 @@ bool llama_context::state_load_file(const char * filepath, llama_token * tokens_`
`1657`	`1657`	`}`
`1658`	`1658`
`1659`	`1659`	`bool llama_context::state_save_file(const char * filepath, const llama_token * tokens, size_t n_token_count) {`
`1660`		`- llama_file file(filepath, "wb");`
	`1660`	`+ llama_file_disk file(filepath, "wb");`
`1661`	`1661`
`1662`	`1662`	`file.write_u32(LLAMA_SESSION_MAGIC);`
`1663`	`1663`	`file.write_u32(LLAMA_SESSION_VERSION);`
`@@ -1674,7 +1674,7 @@ bool llama_context::state_save_file(const char * filepath, const llama_token * t`
`1674`	`1674`	`}`
`1675`	`1675`
`1676`	`1676`	`size_t llama_context::state_seq_load_file(llama_seq_id seq_id, const char * filepath, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out) {`
`1677`		`- llama_file file(filepath, "rb");`
	`1677`	`+ llama_file_disk file(filepath, "rb");`
`1678`	`1678`
`1679`	`1679`	`// version checks`
`1680`	`1680`	`{`
`@@ -1717,7 +1717,7 @@ size_t llama_context::state_seq_load_file(llama_seq_id seq_id, const char * file`
`1717`	`1717`	`}`
`1718`	`1718`
`1719`	`1719`	`size_t llama_context::state_seq_save_file(llama_seq_id seq_id, const char * filepath, const llama_token * tokens, size_t n_token_count) {`
`1720`		`- llama_file file(filepath, "wb");`
	`1720`	`+ llama_file_disk file(filepath, "wb");`
`1721`	`1721`
`1722`	`1722`	`file.write_u32(LLAMA_STATE_SEQ_MAGIC);`
`1723`	`1723`	`file.write_u32(LLAMA_STATE_SEQ_VERSION);`
Original file line number	Diff line number	Diff line change
`@@ -500,7 +500,7 @@ llama_model_loader::llama_model_loader(`
`500`	`500`	`get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);`
`501`	`501`	`llm_kv = LLM_KV(llm_arch_from_string(arch_name));`
`502`	`502`
`503`		`- files.emplace_back(new llama_file(fname.c_str(), "rb"));`
	`503`	`+ files.emplace_back(new llama_file_disk(fname.c_str(), "rb"));`
`504`	`504`	`contexts.emplace_back(ctx);`
`505`	`505`
`506`	`506`	`// Save tensors data offset of the main file.`
`@@ -568,7 +568,7 @@ llama_model_loader::llama_model_loader(`
`568`	`568`	`}`
`569`	`569`	`}`
`570`	`570`
`571`		`- files.emplace_back(new llama_file(fname_split, "rb"));`
	`571`	`+ files.emplace_back(new llama_file_disk(fname_split, "rb"));`
`572`	`572`	`contexts.emplace_back(ctx);`
`573`	`573`
`574`	`574`	`// Save tensors data offset info of the shard.`