Skip to content

Commit 7491b01

Browse files
committed
[common] Stream buffer for uint8 data
- Ensures a char trait implementation for uint8 exists, that can be used with std::basic_streambuff. - Adds an implementation of std::basic_streambuff for a single vector. Will be used by llama.cpp and tests when loading from a single memory buffer.
1 parent 67e1868 commit 7491b01

File tree

3 files changed

+155
-0
lines changed

3 files changed

+155
-0
lines changed

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ add_library(llama
3535
unicode-data.cpp
3636
unicode.cpp
3737
unicode.h
38+
uint8-buff-stream.cpp
3839
)
3940

4041
target_include_directories(llama PRIVATE .)

src/uint8-buff-stream.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#include "uint8-buff-stream.h"
2+
3+
Uint8BufferStreamBuf::Uint8BufferStreamBuf(std::vector<uint8_t> && _data) : data(std::move(_data)) {
4+
setg(const_cast<uint8_t *>(data.data()), const_cast<uint8_t *>(data.data()),
5+
const_cast<uint8_t *>(data.data()) + data.size());
6+
}
7+
8+
Uint8BufferStreamBuf::int_type Uint8BufferStreamBuf::underflow() {
9+
if (gptr() < egptr()) {
10+
return traits_type::to_int_type(*gptr());
11+
}
12+
return traits_type::eof();
13+
}
14+
15+
std::streamsize Uint8BufferStreamBuf::xsgetn(char_type * s, std::streamsize n) {
16+
std::streamsize available = egptr() - gptr();
17+
std::streamsize to_read = std::min(n, available);
18+
if (to_read > 0) {
19+
std::memcpy(s, gptr(), to_read);
20+
setg(eback(), gptr() + to_read, egptr());
21+
}
22+
return to_read;
23+
}
24+
25+
Uint8BufferStreamBuf::pos_type Uint8BufferStreamBuf::seekoff(off_type off, std::ios_base::seekdir dir,
26+
std::ios_base::openmode which) {
27+
if (!(which & std::ios_base::in)) {
28+
return pos_type(off_type(-1));
29+
}
30+
char_type * new_pos = nullptr;
31+
if (dir == std::ios_base::beg) {
32+
new_pos = eback() + off;
33+
} else if (dir == std::ios_base::cur) {
34+
new_pos = gptr() + off;
35+
} else if (dir == std::ios_base::end) {
36+
new_pos = egptr() + off;
37+
}
38+
if (new_pos >= eback() && new_pos <= egptr()) {
39+
setg(eback(), new_pos, egptr());
40+
return new_pos - eback();
41+
}
42+
return pos_type(off_type(-1));
43+
}
44+
45+
Uint8BufferStreamBuf::pos_type Uint8BufferStreamBuf::seekpos(pos_type pos, std::ios_base::openmode which) {
46+
if (!(which & std::ios_base::in)) {
47+
return pos_type(off_type(-1));
48+
}
49+
char_type * new_pos = eback() + pos;
50+
if (new_pos >= eback() && new_pos <= egptr()) {
51+
setg(eback(), new_pos, egptr());
52+
return pos;
53+
}
54+
return pos_type(off_type(-1));
55+
}

src/uint8-buff-stream.h

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#pragma once
2+
3+
#include <cstdint>
4+
#include <cstring>
5+
#include <iostream>
6+
#include <streambuf>
7+
#include <vector>
8+
9+
/// @brief Custom traits for uint8_t for usage in std template classes that use char_traits (e.g. std::basic_streambuf)
10+
template <> struct std::char_traits<uint8_t> {
11+
using char_type = uint8_t;
12+
using int_type = int;
13+
using off_type = std::streamoff;
14+
using pos_type = std::streampos;
15+
using state_type = std::mbstate_t;
16+
17+
static void assign(char_type & c1, const char_type & c2) noexcept { c1 = c2; }
18+
19+
static constexpr bool eq(char_type a, char_type b) noexcept { return a == b; }
20+
21+
static constexpr bool lt(char_type a, char_type b) noexcept { return a < b; }
22+
23+
static int compare(const char_type * s1, const char_type * s2, std::size_t n) {
24+
for (std::size_t i = 0; i < n; ++i) {
25+
if (lt(s1[i], s2[i])) {
26+
return -1;
27+
}
28+
if (lt(s2[i], s1[i])) {
29+
return 1;
30+
}
31+
}
32+
return 0;
33+
}
34+
35+
static std::size_t length(const char_type * s) {
36+
std::size_t i = 0;
37+
while (!eq(s[i], char_type())) {
38+
++i;
39+
}
40+
return i;
41+
}
42+
43+
static const char_type * find(const char_type * s, std::size_t n, const char_type & c) {
44+
for (std::size_t i = 0; i < n; ++i) {
45+
if (eq(s[i], c)) {
46+
return s + i;
47+
}
48+
}
49+
return nullptr;
50+
}
51+
52+
static char_type * move(char_type * s1, const char_type * s2, std::size_t n) {
53+
return static_cast<char_type *>(std::memmove(s1, s2, n));
54+
}
55+
56+
static char_type * copy(char_type * s1, const char_type * s2, std::size_t n) {
57+
return static_cast<char_type *>(std::memcpy(s1, s2, n));
58+
}
59+
60+
static char_type * assign(char_type * s, std::size_t n, char_type c) {
61+
for (std::size_t i = 0; i < n; ++i) {
62+
s[i] = c;
63+
}
64+
return s;
65+
}
66+
67+
static constexpr int_type not_eof(int_type c) noexcept { return eq_int_type(c, eof()) ? 0 : c; }
68+
69+
static constexpr char_type to_char_type(int_type c) noexcept {
70+
return c >= 0 && c <= 255 ? static_cast<char_type>(c) : char_type();
71+
}
72+
73+
static constexpr int_type to_int_type(char_type c) noexcept { return static_cast<int_type>(c); }
74+
75+
static constexpr bool eq_int_type(int_type c1, int_type c2) noexcept { return c1 == c2; }
76+
77+
static constexpr int_type eof() noexcept { return static_cast<int_type>(-1); }
78+
};
79+
80+
/// @brief Custom streambuf for uint8_t
81+
class Uint8BufferStreamBuf : public std::basic_streambuf<uint8_t> {
82+
public:
83+
Uint8BufferStreamBuf(std::vector<uint8_t> && _data);
84+
85+
protected:
86+
int_type underflow() override;
87+
88+
/// @brief Efficient bulk reading. The standard implementation specifies that this function can be overridden
89+
/// to provide a more efficient implementation: sgetn will call this function if it is overridden.
90+
std::streamsize xsgetn(char_type * s, std::streamsize n) override;
91+
92+
pos_type seekoff(off_type off, std::ios_base::seekdir dir,
93+
std::ios_base::openmode which = std::ios_base::in) override;
94+
95+
pos_type seekpos(pos_type pos, std::ios_base::openmode which = std::ios_base::in) override;
96+
97+
private:
98+
std::vector<uint8_t> data;
99+
};

0 commit comments

Comments
 (0)