Skip to content

Commit 959d495

Browse files
committed
char_buff_stream
1 parent cb23842 commit 959d495

File tree

11 files changed

+35
-194
lines changed

11 files changed

+35
-194
lines changed

common_test/load_into_memory.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@ std::vector<uint8_t> load_file_into_buffer(const char * const model_path) {
4747
return buffer;
4848
}
4949

50-
std::unique_ptr<std::basic_streambuf<uint8_t>> load_file_into_streambuf(const char * const model_path) {
50+
std::unique_ptr<std::basic_streambuf<char>> load_file_into_streambuf(const char * const model_path) {
5151
return std::make_unique<Uint8BufferStreamBuf>(load_file_into_buffer(model_path));
5252
}
5353

5454
struct file_entry {
5555
std::string path;
56-
std::unique_ptr<std::basic_streambuf<uint8_t>> streambuf;
56+
std::unique_ptr<std::basic_streambuf<char>> streambuf;
5757
};
5858

5959
std::vector<file_entry> load_files_into_streambuf(const char * const model_path) {

ggml/include/gguf.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ extern "C" {
200200
}
201201
#endif
202202

203-
#ifdef __cplusplus
203+
#if defined(__cplusplus) && __cplusplus >= 201703L
204204
#include <ios>
205-
GGML_API struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<uint8_t>& streambuf, struct gguf_init_params params);
205+
GGML_API struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<char>& streambuf, struct gguf_init_params params);
206206
#endif

ggml/include/uint8-buff-stream.h

Lines changed: 5 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -3,167 +3,9 @@
33
#include <cstdint>
44
#include <cstring>
55
#include <iostream>
6-
#include <streambuf>
6+
#include <sstream>
77
#include <vector>
88

9-
#ifdef __APPLE__
10-
# include <locale>
11-
12-
/// @brief Custom ctype specialization for uint8_t to work around libc++
13-
/// limitation in macOS
14-
template <> struct std::ctype<uint8_t> : public std::ctype_base {
15-
using char_type = uint8_t;
16-
static std::locale::id id;
17-
18-
ctype() : std::ctype_base() {}
19-
20-
ctype([[maybe_unused]] const std::locale::facet & other) : std::ctype_base() {}
21-
22-
ctype & operator=(const ctype & other) {
23-
if (this != &other) {
24-
std::ctype_base::operator=(other);
25-
}
26-
return *this;
27-
}
28-
29-
// Required public interface methods
30-
bool is(mask m, [[maybe_unused]] char_type c) const {
31-
return (m & space) != 0; // Treat all uint8_t as non-space
32-
}
33-
34-
const char_type * is(const char_type * low, const char_type * high, mask * vec) const {
35-
for (; low != high; ++low, ++vec) {
36-
*vec = 0; // No special character properties
37-
}
38-
return high;
39-
}
40-
41-
const char_type * scan_is(mask m, const char_type * low, const char_type * high) const {
42-
for (; low != high; ++low) {
43-
if (is(m, *low)) {
44-
return low;
45-
}
46-
}
47-
return high;
48-
}
49-
50-
const char_type * scan_not(mask m, const char_type * low, const char_type * high) const {
51-
for (; low != high; ++low) {
52-
if (!is(m, *low)) {
53-
return low;
54-
}
55-
}
56-
return high;
57-
}
58-
59-
char_type toupper(char_type c) const {
60-
return c; // No case conversion for uint8_t
61-
}
62-
63-
const char_type * toupper([[maybe_unused]] char_type * low, const char_type * high) const {
64-
return high; // No case conversion for uint8_t
65-
}
66-
67-
char_type tolower(char_type c) const {
68-
return c; // No case conversion for uint8_t
69-
}
70-
71-
const char_type * tolower([[maybe_unused]] char_type * low, const char_type * high) const {
72-
return high; // No case conversion for uint8_t
73-
}
74-
75-
char_type widen(char c) const { return static_cast<char_type>(c); }
76-
77-
const char * widen(const char * low, const char * high, char_type * dest) const {
78-
for (; low != high; ++low, ++dest) {
79-
*dest = static_cast<char_type>(*low);
80-
}
81-
return high;
82-
}
83-
84-
char narrow(char_type c, [[maybe_unused]] char dfault) const { return static_cast<char>(c); }
85-
86-
const char_type * narrow(const char_type * low, const char_type * high, [[maybe_unused]] char dfault,
87-
char * dest) const {
88-
for (; low != high; ++low, ++dest) {
89-
*dest = static_cast<char>(*low);
90-
}
91-
return high;
92-
}
93-
};
94-
#endif
95-
96-
/// @brief Custom traits for uint8_t for usage in std template classes that use char_traits (e.g. std::basic_streambuf)
97-
template <> struct std::char_traits<uint8_t> {
98-
using char_type = uint8_t;
99-
using int_type = int;
100-
using off_type = std::streamoff;
101-
using pos_type = std::streampos;
102-
using state_type = std::mbstate_t;
103-
104-
static void assign(char_type & c1, const char_type & c2) noexcept { c1 = c2; }
105-
106-
static constexpr bool eq(char_type a, char_type b) noexcept { return a == b; }
107-
108-
static constexpr bool lt(char_type a, char_type b) noexcept { return a < b; }
109-
110-
static int compare(const char_type * s1, const char_type * s2, std::size_t n) {
111-
for (std::size_t i = 0; i < n; ++i) {
112-
if (lt(s1[i], s2[i])) {
113-
return -1;
114-
}
115-
if (lt(s2[i], s1[i])) {
116-
return 1;
117-
}
118-
}
119-
return 0;
120-
}
121-
122-
static std::size_t length(const char_type * s) {
123-
std::size_t i = 0;
124-
while (!eq(s[i], char_type())) {
125-
++i;
126-
}
127-
return i;
128-
}
129-
130-
static const char_type * find(const char_type * s, std::size_t n, const char_type & c) {
131-
for (std::size_t i = 0; i < n; ++i) {
132-
if (eq(s[i], c)) {
133-
return s + i;
134-
}
135-
}
136-
return nullptr;
137-
}
138-
139-
static char_type * move(char_type * s1, const char_type * s2, std::size_t n) {
140-
return static_cast<char_type *>(std::memmove(s1, s2, n));
141-
}
142-
143-
static char_type * copy(char_type * s1, const char_type * s2, std::size_t n) {
144-
return static_cast<char_type *>(std::memcpy(s1, s2, n));
145-
}
146-
147-
static char_type * assign(char_type * s, std::size_t n, char_type c) {
148-
for (std::size_t i = 0; i < n; ++i) {
149-
s[i] = c;
150-
}
151-
return s;
152-
}
153-
154-
static constexpr int_type not_eof(int_type c) noexcept { return eq_int_type(c, eof()) ? 0 : c; }
155-
156-
static constexpr char_type to_char_type(int_type c) noexcept {
157-
return c >= 0 && c <= 255 ? static_cast<char_type>(c) : char_type();
158-
}
159-
160-
static constexpr int_type to_int_type(char_type c) noexcept { return static_cast<int_type>(c); }
161-
162-
static constexpr bool eq_int_type(int_type c1, int_type c2) noexcept { return c1 == c2; }
163-
164-
static constexpr int_type eof() noexcept { return static_cast<int_type>(-1); }
165-
};
166-
1679
#ifdef GGML_SHARED
16810
# if defined(_WIN32) && !defined(__MINGW32__)
16911
# ifdef GGML_BUILD
@@ -178,8 +20,10 @@ template <> struct std::char_traits<uint8_t> {
17820
# define GGML_CLASS_API
17921
#endif
18022

181-
/// @brief Custom streambuf for uint8_t
182-
class GGML_CLASS_API Uint8BufferStreamBuf : public std::basic_streambuf<uint8_t> {
23+
/// @brief Custom basic_streambuf<char> for uint8_t input data, that owns the underlying data.
24+
/// @note basic_streambuf<char> has more support on different platforms than basic_streambuf<uint8_t>
25+
/// which is missing on some platforms (e.g. MacOS, newer NDKs). C++ 17 provides additional guarantees for char.
26+
class GGML_CLASS_API Uint8BufferStreamBuf : public std::basic_streambuf<char> {
18327
public:
18428
Uint8BufferStreamBuf(std::vector<uint8_t> && _data);
18529

ggml/src/gguf.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -236,13 +236,13 @@ struct gguf_bytes_reader {
236236
gguf_bytes_reader::~gguf_bytes_reader() {}
237237

238238
struct gguf_bytes_buffer_reader : public gguf_bytes_reader {
239-
gguf_bytes_buffer_reader(std::basic_streambuf<uint8_t> & streambuf) : streambuf(streambuf), offset(0) {}
239+
gguf_bytes_buffer_reader(std::basic_streambuf<char> & streambuf) : streambuf(streambuf), offset(0) {}
240240

241241
~gguf_bytes_buffer_reader() {}
242242

243243
size_t read(void * buffer, size_t size, size_t count) override {
244244
size_t total_size = size * count;
245-
auto bytes_read = streambuf.sgetn(static_cast<uint8_t *>(buffer), total_size);
245+
auto bytes_read = streambuf.sgetn(static_cast<char*>(buffer), total_size);
246246
offset += bytes_read;
247247
return bytes_read;
248248
}
@@ -260,8 +260,8 @@ struct gguf_bytes_buffer_reader : public gguf_bytes_reader {
260260
}
261261

262262
private:
263-
std::basic_streambuf<uint8_t> & streambuf;
264-
size_t offset;
263+
std::basic_streambuf<char> & streambuf;
264+
size_t offset;
265265
};
266266

267267
struct gguf_bytes_file_reader : public gguf_bytes_reader {
@@ -815,7 +815,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
815815
return result;
816816
}
817817

818-
struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<uint8_t> & streambuf, struct gguf_init_params params) {
818+
struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<char> & streambuf, struct gguf_init_params params) {
819819
gguf_bytes_buffer_reader bytes_reader(streambuf);
820820
gguf_reader reader(bytes_reader);
821821
return gguf_init_from_reader_impl(reader, params);

ggml/src/uint8-buff-stream.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ std::locale::id std::ctype<uint8_t>::id;
55
#endif
66

77
Uint8BufferStreamBuf::Uint8BufferStreamBuf(std::vector<uint8_t> && _data) : data(std::move(_data)) {
8-
setg(const_cast<uint8_t *>(data.data()), const_cast<uint8_t *>(data.data()),
9-
const_cast<uint8_t *>(data.data()) + data.size());
8+
// Cast uint8_t* to char* for basic_streambuf<char> - this is safe since both are 1-byte types
9+
char* start = reinterpret_cast<char*>(data.data());
10+
setg(start, start, start + data.size());
1011
}
1112

1213
Uint8BufferStreamBuf::int_type Uint8BufferStreamBuf::underflow() {

include/llama-cpp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,4 @@ typedef std::unique_ptr<llama_adapter_lora, llama_adapter_lora_deleter> llama_ad
3333
LLAMA_API struct llama_model * llama_model_load_from_buffer(std::vector<uint8_t> && data,
3434
struct llama_model_params params);
3535
LLAMA_API bool llama_model_load_fulfill_split_future(const char * path, const char * context,
36-
std::unique_ptr<std::basic_streambuf<uint8_t>> && streambuf);
36+
std::unique_ptr<std::basic_streambuf<char>> && streambuf);

src/llama-mmap.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include "llama-mmap.h"
22

33
#include "llama-impl.h"
4-
#include "uint8-buff-stream.h"
54

65
#include "ggml.h"
76

@@ -11,6 +10,7 @@
1110
#include <cerrno>
1211
#include <algorithm>
1312
#include <map>
13+
#include <streambuf>
1414

1515
#ifdef __has_include
1616
#if __has_include(<unistd.h>)
@@ -268,7 +268,7 @@ void llama_file_disk::write_raw(const void * ptr, size_t len) const { pimpl->wri
268268
void llama_file_disk::write_u32(uint32_t val) const { pimpl->write_u32(val); }
269269

270270
template <bool Writable>
271-
llama_file_buffer<Writable>::llama_file_buffer(std::unique_ptr<std::basic_streambuf<uint8_t>> && streambuf) :
271+
llama_file_buffer<Writable>::llama_file_buffer(std::unique_ptr<std::basic_streambuf<char>> && streambuf) :
272272
streambuf(std::move(streambuf)) {}
273273

274274
template <bool Writable> llama_file_buffer<Writable>::~llama_file_buffer() = default;
@@ -301,7 +301,7 @@ template <bool Writable> void llama_file_buffer<Writable>::seek(size_t offset, i
301301
}
302302

303303
template <bool Writable> void llama_file_buffer<Writable>::read_raw(void * ptr, size_t len) const {
304-
auto bytes_read = streambuf->sgetn(static_cast<uint8_t *>(ptr), len);
304+
auto bytes_read = streambuf->sgetn(static_cast<char *>(ptr), len);
305305
if (bytes_read != static_cast<std::streamsize>(len)) {
306306
throw std::runtime_error("read beyond end of buffer");
307307
}
@@ -327,7 +327,7 @@ template <> void llama_file_buffer<false>::write_u32(uint32_t val) const {
327327
}
328328

329329
template <> void llama_file_buffer<true>::write_raw(const void * ptr, size_t len) const {
330-
auto bytes_written = streambuf->sputn(static_cast<const uint8_t *>(ptr), len);
330+
auto bytes_written = streambuf->sputn(static_cast<const char *>(ptr), len);
331331
if (bytes_written != static_cast<std::streamsize>(len)) {
332332
throw std::runtime_error("write beyond end of buffer");
333333
}

src/llama-mmap.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#include <cstdint>
44
#include <memory>
55
#include <vector>
6-
#include "uint8-buff-stream.h"
76
#include <future>
87
#include <string>
98
#include <map>
@@ -54,7 +53,9 @@ struct llama_file_disk : public llama_file {
5453
};
5554

5655
template <bool Writable> struct llama_file_buffer : public llama_file {
57-
llama_file_buffer(std::unique_ptr<std::basic_streambuf<uint8_t>> && streambuf);
56+
/// @note Use char for the streambuf because not all platforms support uint8_t specialization (e.g. MacOS or newer NDKs)
57+
/// from C++17 there are guarantees that make safe to access binary data from char
58+
llama_file_buffer(std::unique_ptr<std::basic_streambuf<char>> && streambuf);
5859

5960
~llama_file_buffer() override;
6061

@@ -75,7 +76,7 @@ template <bool Writable> struct llama_file_buffer : public llama_file {
7576
/// @throw std::runtime_error if the buffer is read-only
7677
void write_u32(uint32_t val) const override;
7778

78-
std::unique_ptr<std::basic_streambuf<uint8_t>> streambuf;
79+
std::unique_ptr<std::basic_streambuf<char>> streambuf;
7980
};
8081

8182
template <bool Writable> struct llama_future_file_buffer {

src/llama-model-load-input.cpp

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "llama-model-load-input.h"
2+
23
#include <sstream>
4+
35
#include "llama-mmap.h"
46

57
namespace load_input_variant {
@@ -44,17 +46,10 @@ std::optional<std::set<std::string>> parse_tensor_list_from_future(load_input_t
4446
llama_future_file_buffer_ro tensor_file(future_input.tensor_list_file, future_input.context);
4547
std::unique_ptr<llama_file_buffer_ro> file_buffer = tensor_file.extract();
4648

47-
// Read the entire buffer as bytes and convert to string
48-
std::vector<uint8_t> buffer;
49-
std::basic_istream<uint8_t> stream(file_buffer->streambuf.get());
50-
std::istreambuf_iterator<uint8_t> begin(stream), end;
51-
buffer.assign(begin, end);
52-
53-
// Convert bytes to string and split by newlines
54-
std::string content(reinterpret_cast<const char *>(buffer.data()), buffer.size());
55-
std::istringstream line_stream(content);
56-
std::string line;
57-
while (std::getline(line_stream, line)) {
49+
// Read directly from the stream
50+
std::basic_istream<char> stream(file_buffer->streambuf.get());
51+
std::string line;
52+
while (std::getline(stream, line)) {
5853
tensor_names.insert(line);
5954
}
6055

src/llama-model-load-input.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ struct fname_load_input {
1717
};
1818

1919
struct buffer_load_input {
20-
std::unique_ptr<std::basic_streambuf<uint8_t>> & streambuf;
20+
std::unique_ptr<std::basic_streambuf<char>> & streambuf;
2121
};
2222

2323
struct buffer_future_load_input {

0 commit comments

Comments
 (0)