char_buff_stream

jesusmb1995 · jesusmb1995 · commit 959d495fd46d · 2025-09-19T13:02:25.000+02:00
diff --git a/common_test/load_into_memory.h b/common_test/load_into_memory.h
@@ -47,13 +47,13 @@ std::vector<uint8_t> load_file_into_buffer(const char * const model_path) {
     return buffer;
 }
 
-std::unique_ptr<std::basic_streambuf<uint8_t>> load_file_into_streambuf(const char * const model_path) {
+std::unique_ptr<std::basic_streambuf<char>> load_file_into_streambuf(const char * const model_path) {
     return std::make_unique<Uint8BufferStreamBuf>(load_file_into_buffer(model_path));
 }
 
 struct file_entry {
     std::string                                    path;
-    std::unique_ptr<std::basic_streambuf<uint8_t>> streambuf;
+    std::unique_ptr<std::basic_streambuf<char>>    streambuf;
 };
 
 std::vector<file_entry> load_files_into_streambuf(const char * const model_path) {
diff --git a/ggml/include/gguf.h b/ggml/include/gguf.h
@@ -200,7 +200,7 @@ extern "C" {
 }
 #endif
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && __cplusplus >= 201703L
 #include <ios>
-GGML_API struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<uint8_t>& streambuf, struct gguf_init_params params);
+GGML_API struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<char>& streambuf, struct gguf_init_params params);
 #endif
diff --git a/ggml/include/uint8-buff-stream.h b/ggml/include/uint8-buff-stream.h
@@ -3,167 +3,9 @@
 #include <cstdint>
 #include <cstring>
 #include <iostream>
-#include <streambuf>
+#include <sstream>
 #include <vector>
 
-#ifdef __APPLE__
-#    include <locale>
-
-/// @brief Custom ctype specialization for uint8_t to work around libc++
-/// limitation in macOS
-template <> struct std::ctype<uint8_t> : public std::ctype_base {
-    using char_type = uint8_t;
-    static std::locale::id id;
-
-    ctype() : std::ctype_base() {}
-
-    ctype([[maybe_unused]] const std::locale::facet & other) : std::ctype_base() {}
-
-    ctype & operator=(const ctype & other) {
-        if (this != &other) {
-            std::ctype_base::operator=(other);
-        }
-        return *this;
-    }
-
-    // Required public interface methods
-    bool is(mask m, [[maybe_unused]] char_type c) const {
-        return (m & space) != 0;  // Treat all uint8_t as non-space
-    }
-
-    const char_type * is(const char_type * low, const char_type * high, mask * vec) const {
-        for (; low != high; ++low, ++vec) {
-            *vec = 0;  // No special character properties
-        }
-        return high;
-    }
-
-    const char_type * scan_is(mask m, const char_type * low, const char_type * high) const {
-        for (; low != high; ++low) {
-            if (is(m, *low)) {
-                return low;
-            }
-        }
-        return high;
-    }
-
-    const char_type * scan_not(mask m, const char_type * low, const char_type * high) const {
-        for (; low != high; ++low) {
-            if (!is(m, *low)) {
-                return low;
-            }
-        }
-        return high;
-    }
-
-    char_type toupper(char_type c) const {
-        return c;  // No case conversion for uint8_t
-    }
-
-    const char_type * toupper([[maybe_unused]] char_type * low, const char_type * high) const {
-        return high;  // No case conversion for uint8_t
-    }
-
-    char_type tolower(char_type c) const {
-        return c;  // No case conversion for uint8_t
-    }
-
-    const char_type * tolower([[maybe_unused]] char_type * low, const char_type * high) const {
-        return high;  // No case conversion for uint8_t
-    }
-
-    char_type widen(char c) const { return static_cast<char_type>(c); }
-
-    const char * widen(const char * low, const char * high, char_type * dest) const {
-        for (; low != high; ++low, ++dest) {
-            *dest = static_cast<char_type>(*low);
-        }
-        return high;
-    }
-
-    char narrow(char_type c, [[maybe_unused]] char dfault) const { return static_cast<char>(c); }
-
-    const char_type * narrow(const char_type * low, const char_type * high, [[maybe_unused]] char dfault,
-                             char * dest) const {
-        for (; low != high; ++low, ++dest) {
-            *dest = static_cast<char>(*low);
-        }
-        return high;
-    }
-};
-#endif
-
-/// @brief Custom traits for uint8_t for usage in std template classes that use char_traits (e.g. std::basic_streambuf)
-template <> struct std::char_traits<uint8_t> {
-    using char_type  = uint8_t;
-    using int_type   = int;
-    using off_type   = std::streamoff;
-    using pos_type   = std::streampos;
-    using state_type = std::mbstate_t;
-
-    static void assign(char_type & c1, const char_type & c2) noexcept { c1 = c2; }
-
-    static constexpr bool eq(char_type a, char_type b) noexcept { return a == b; }
-
-    static constexpr bool lt(char_type a, char_type b) noexcept { return a < b; }
-
-    static int compare(const char_type * s1, const char_type * s2, std::size_t n) {
-        for (std::size_t i = 0; i < n; ++i) {
-            if (lt(s1[i], s2[i])) {
-                return -1;
-            }
-            if (lt(s2[i], s1[i])) {
-                return 1;
-            }
-        }
-        return 0;
-    }
-
-    static std::size_t length(const char_type * s) {
-        std::size_t i = 0;
-        while (!eq(s[i], char_type())) {
-            ++i;
-        }
-        return i;
-    }
-
-    static const char_type * find(const char_type * s, std::size_t n, const char_type & c) {
-        for (std::size_t i = 0; i < n; ++i) {
-            if (eq(s[i], c)) {
-                return s + i;
-            }
-        }
-        return nullptr;
-    }
-
-    static char_type * move(char_type * s1, const char_type * s2, std::size_t n) {
-        return static_cast<char_type *>(std::memmove(s1, s2, n));
-    }
-
-    static char_type * copy(char_type * s1, const char_type * s2, std::size_t n) {
-        return static_cast<char_type *>(std::memcpy(s1, s2, n));
-    }
-
-    static char_type * assign(char_type * s, std::size_t n, char_type c) {
-        for (std::size_t i = 0; i < n; ++i) {
-            s[i] = c;
-        }
-        return s;
-    }
-
-    static constexpr int_type not_eof(int_type c) noexcept { return eq_int_type(c, eof()) ? 0 : c; }
-
-    static constexpr char_type to_char_type(int_type c) noexcept {
-        return c >= 0 && c <= 255 ? static_cast<char_type>(c) : char_type();
-    }
-
-    static constexpr int_type to_int_type(char_type c) noexcept { return static_cast<int_type>(c); }
-
-    static constexpr bool eq_int_type(int_type c1, int_type c2) noexcept { return c1 == c2; }
-
-    static constexpr int_type eof() noexcept { return static_cast<int_type>(-1); }
-};
-
 #ifdef GGML_SHARED
 #    if defined(_WIN32) && !defined(__MINGW32__)
 #        ifdef GGML_BUILD
@@ -178,8 +20,10 @@ template <> struct std::char_traits<uint8_t> {
 #    define GGML_CLASS_API
 #endif
 
-/// @brief Custom streambuf for uint8_t
-class GGML_CLASS_API Uint8BufferStreamBuf : public std::basic_streambuf<uint8_t> {
+/// @brief Custom basic_streambuf<char> for uint8_t input data, that owns the underlying data. 
+/// @note basic_streambuf<char> has more support on different platforms than basic_streambuf<uint8_t>
+/// which is missing on some platforms (e.g. MacOS, newer NDKs). C++ 17 provides additional guarantees for char.
+class GGML_CLASS_API Uint8BufferStreamBuf : public std::basic_streambuf<char> {
   public:
     Uint8BufferStreamBuf(std::vector<uint8_t> && _data);
 
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
@@ -236,13 +236,13 @@ struct gguf_bytes_reader {
 gguf_bytes_reader::~gguf_bytes_reader() {}
 
 struct gguf_bytes_buffer_reader : public gguf_bytes_reader {
-    gguf_bytes_buffer_reader(std::basic_streambuf<uint8_t> & streambuf) : streambuf(streambuf), offset(0) {}
+    gguf_bytes_buffer_reader(std::basic_streambuf<char> & streambuf) : streambuf(streambuf), offset(0) {}
 
     ~gguf_bytes_buffer_reader() {}
 
     size_t read(void * buffer, size_t size, size_t count) override {
         size_t total_size = size * count;
-        auto   bytes_read = streambuf.sgetn(static_cast<uint8_t *>(buffer), total_size);
+        auto   bytes_read = streambuf.sgetn(static_cast<char*>(buffer), total_size);
         offset += bytes_read;
         return bytes_read;
     }
@@ -260,8 +260,8 @@ struct gguf_bytes_buffer_reader : public gguf_bytes_reader {
     }
 
   private:
-    std::basic_streambuf<uint8_t> & streambuf;
-    size_t                          offset;
+    std::basic_streambuf<char> & streambuf;
+    size_t                       offset;
 };
 
 struct gguf_bytes_file_reader : public gguf_bytes_reader {
@@ -815,7 +815,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
     return result;
 }
 
-struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<uint8_t> & streambuf, struct gguf_init_params params) {
+struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<char> & streambuf, struct gguf_init_params params) {
     gguf_bytes_buffer_reader bytes_reader(streambuf);
     gguf_reader              reader(bytes_reader);
     return gguf_init_from_reader_impl(reader, params);
diff --git a/ggml/src/uint8-buff-stream.cpp b/ggml/src/uint8-buff-stream.cpp
@@ -5,8 +5,9 @@ std::locale::id std::ctype<uint8_t>::id;
 #endif
 
 Uint8BufferStreamBuf::Uint8BufferStreamBuf(std::vector<uint8_t> && _data) : data(std::move(_data)) {
-    setg(const_cast<uint8_t *>(data.data()), const_cast<uint8_t *>(data.data()),
-         const_cast<uint8_t *>(data.data()) + data.size());
+    // Cast uint8_t* to char* for basic_streambuf<char> - this is safe since both are 1-byte types
+    char* start = reinterpret_cast<char*>(data.data());
+    setg(start, start, start + data.size());
 }
 
 Uint8BufferStreamBuf::int_type Uint8BufferStreamBuf::underflow() {
diff --git a/include/llama-cpp.h b/include/llama-cpp.h
@@ -33,4 +33,4 @@ typedef std::unique_ptr<llama_adapter_lora, llama_adapter_lora_deleter> llama_ad
 LLAMA_API struct llama_model * llama_model_load_from_buffer(std::vector<uint8_t> &&   data,
                                                             struct llama_model_params params);
 LLAMA_API bool                 llama_model_load_fulfill_split_future(const char * path, const char * context,
-                                                                     std::unique_ptr<std::basic_streambuf<uint8_t>> && streambuf);
+                                                                     std::unique_ptr<std::basic_streambuf<char>> && streambuf);
diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp
@@ -1,7 +1,6 @@
 #include "llama-mmap.h"
 
 #include "llama-impl.h"
-#include "uint8-buff-stream.h"
 
 #include "ggml.h"
 
@@ -11,6 +10,7 @@
 #include <cerrno>
 #include <algorithm>
 #include <map>
+#include <streambuf>
 
 #ifdef __has_include
     #if __has_include(<unistd.h>)
@@ -268,7 +268,7 @@ void llama_file_disk::write_raw(const void * ptr, size_t len) const { pimpl->wri
 void llama_file_disk::write_u32(uint32_t val) const { pimpl->write_u32(val); }
 
 template <bool Writable>
-llama_file_buffer<Writable>::llama_file_buffer(std::unique_ptr<std::basic_streambuf<uint8_t>> && streambuf) :
+llama_file_buffer<Writable>::llama_file_buffer(std::unique_ptr<std::basic_streambuf<char>> && streambuf) :
     streambuf(std::move(streambuf)) {}
 
 template <bool Writable> llama_file_buffer<Writable>::~llama_file_buffer() = default;
@@ -301,7 +301,7 @@ template <bool Writable> void llama_file_buffer<Writable>::seek(size_t offset, i
 }
 
 template <bool Writable> void llama_file_buffer<Writable>::read_raw(void * ptr, size_t len) const {
-    auto bytes_read = streambuf->sgetn(static_cast<uint8_t *>(ptr), len);
+    auto bytes_read = streambuf->sgetn(static_cast<char *>(ptr), len);
     if (bytes_read != static_cast<std::streamsize>(len)) {
         throw std::runtime_error("read beyond end of buffer");
     }
@@ -327,7 +327,7 @@ template <> void llama_file_buffer<false>::write_u32(uint32_t val) const {
 }
 
 template <> void llama_file_buffer<true>::write_raw(const void * ptr, size_t len) const {
-    auto bytes_written = streambuf->sputn(static_cast<const uint8_t *>(ptr), len);
+    auto bytes_written = streambuf->sputn(static_cast<const char *>(ptr), len);
     if (bytes_written != static_cast<std::streamsize>(len)) {
         throw std::runtime_error("write beyond end of buffer");
     }
diff --git a/src/llama-mmap.h b/src/llama-mmap.h
@@ -3,7 +3,6 @@
 #include <cstdint>
 #include <memory>
 #include <vector>
-#include "uint8-buff-stream.h"
 #include <future>
 #include <string>
 #include <map>
@@ -54,7 +53,9 @@ struct llama_file_disk : public llama_file {
 };
 
 template <bool Writable> struct llama_file_buffer : public llama_file {
-    llama_file_buffer(std::unique_ptr<std::basic_streambuf<uint8_t>> && streambuf);
+    /// @note Use char for the streambuf because not all platforms support uint8_t specialization (e.g. MacOS or newer NDKs)
+    ///       from C++17 there are guarantees that make safe to access binary data from char
+    llama_file_buffer(std::unique_ptr<std::basic_streambuf<char>> && streambuf);
 
     ~llama_file_buffer() override;
 
@@ -75,7 +76,7 @@ template <bool Writable> struct llama_file_buffer : public llama_file {
     /// @throw std::runtime_error if the buffer is read-only
     void write_u32(uint32_t val) const override;
 
-    std::unique_ptr<std::basic_streambuf<uint8_t>> streambuf;
+    std::unique_ptr<std::basic_streambuf<char>> streambuf;
 };
 
 template <bool Writable> struct llama_future_file_buffer {
diff --git a/src/llama-model-load-input.cpp b/src/llama-model-load-input.cpp
@@ -1,5 +1,7 @@
 #include "llama-model-load-input.h"
+
 #include <sstream>
+
 #include "llama-mmap.h"
 
 namespace load_input_variant {
@@ -44,17 +46,10 @@ std::optional<std::set<std::string>> parse_tensor_list_from_future(load_input_t
     llama_future_file_buffer_ro           tensor_file(future_input.tensor_list_file, future_input.context);
     std::unique_ptr<llama_file_buffer_ro> file_buffer = tensor_file.extract();
 
-    // Read the entire buffer as bytes and convert to string
-    std::vector<uint8_t>              buffer;
-    std::basic_istream<uint8_t>       stream(file_buffer->streambuf.get());
-    std::istreambuf_iterator<uint8_t> begin(stream), end;
-    buffer.assign(begin, end);
-
-    // Convert bytes to string and split by newlines
-    std::string        content(reinterpret_cast<const char *>(buffer.data()), buffer.size());
-    std::istringstream line_stream(content);
-    std::string        line;
-    while (std::getline(line_stream, line)) {
+    // Read directly from the stream
+    std::basic_istream<char> stream(file_buffer->streambuf.get());
+    std::string              line;
+    while (std::getline(stream, line)) {
         tensor_names.insert(line);
     }
 
diff --git a/src/llama-model-load-input.h b/src/llama-model-load-input.h
@@ -17,7 +17,7 @@ struct fname_load_input {
 };
 
 struct buffer_load_input {
-    std::unique_ptr<std::basic_streambuf<uint8_t>> & streambuf;
+    std::unique_ptr<std::basic_streambuf<char>> & streambuf;
 };
 
 struct buffer_future_load_input {
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -268,7 +268,7 @@ void override_and_disable_mmap(struct llama_model_params & params) {
 }  // namespace
 
 struct llama_model * llama_model_load_from_buffer(std::vector<uint8_t> && data, struct llama_model_params params) {
-    std::unique_ptr<std::basic_streambuf<uint8_t>> streambuf = std::make_unique<Uint8BufferStreamBuf>(std::move(data));
+    std::unique_ptr<std::basic_streambuf<char>> streambuf = std::make_unique<Uint8BufferStreamBuf>(std::move(data));
     override_and_disable_mmap(params);
     llama_model_loader ml(load_input_variant::buffer_load_input{ streambuf }, params.use_mmap, params.check_tensors,
                           params.kv_overrides, params.tensor_buft_overrides);
@@ -316,7 +316,7 @@ struct llama_model * llama_model_load_from_split_futures(const char ** paths, si
 }
 
 bool llama_model_load_fulfill_split_future(const char * path, const char * context,
-                                           std::unique_ptr<std::basic_streambuf<uint8_t>> && streambuf) {
+                                           std::unique_ptr<std::basic_streambuf<char>> && streambuf) {
     return llama_future_file_buffer_ro::fulfill_promise(path, context,
                                                         std::make_unique<llama_file_buffer_ro>(std::move(streambuf)));
 }

Original file line number	Diff line number	Diff line change
`@@ -47,13 +47,13 @@ std::vector<uint8_t> load_file_into_buffer(const char * const model_path) {`
`47`	`47`	`return buffer;`
`48`	`48`	`}`
`49`	`49`
`50`		`-std::unique_ptr<std::basic_streambuf<uint8_t>> load_file_into_streambuf(const char * const model_path) {`
	`50`	`+std::unique_ptr<std::basic_streambuf<char>> load_file_into_streambuf(const char * const model_path) {`
`51`	`51`	`return std::make_unique<Uint8BufferStreamBuf>(load_file_into_buffer(model_path));`
`52`	`52`	`}`
`53`	`53`
`54`	`54`	`struct file_entry {`
`55`	`55`	`std::string path;`
`56`		`- std::unique_ptr<std::basic_streambuf<uint8_t>> streambuf;`
	`56`	`+ std::unique_ptr<std::basic_streambuf<char>> streambuf;`
`57`	`57`	`};`
`58`	`58`
`59`	`59`	`std::vector<file_entry> load_files_into_streambuf(const char * const model_path) {`
Original file line number	Diff line number	Diff line change
`@@ -200,7 +200,7 @@ extern "C" {`
`200`	`200`	`}`
`201`	`201`	`#endif`
`202`	`202`
`203`		`-#ifdef __cplusplus`
	`203`	`+#if defined(__cplusplus) && __cplusplus >= 201703L`
`204`	`204`	`#include <ios>`
`205`		`-GGML_API struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<uint8_t>& streambuf, struct gguf_init_params params);`
	`205`	`+GGML_API struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<char>& streambuf, struct gguf_init_params params);`
`206`	`206`	`#endif`