Skip to content

Commit f798f1f

Browse files
feat(kv-ir): Add StringBlob for compact storage of multiple strings in a contiguous byte buffer. (#1544)
Co-authored-by: Devin Gibson <[email protected]>
1 parent 6b04fa5 commit f798f1f

File tree

3 files changed

+113
-0
lines changed

3 files changed

+113
-0
lines changed

components/core/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ set(SOURCE_FILES_unitTest
484484
src/clp/ffi/encoding_methods.cpp
485485
src/clp/ffi/encoding_methods.hpp
486486
src/clp/ffi/encoding_methods.inc
487+
src/clp/ffi/test/test_StringBlob.cpp
487488
src/clp/ffi/ir_stream/byteswap.hpp
488489
src/clp/ffi/ir_stream/Deserializer.hpp
489490
src/clp/ffi/ir_stream/decoding_methods.cpp
@@ -535,6 +536,7 @@ set(SOURCE_FILES_unitTest
535536
src/clp/ffi/search/Subquery.hpp
536537
src/clp/ffi/search/WildcardToken.cpp
537538
src/clp/ffi/search/WildcardToken.hpp
539+
src/clp/ffi/StringBlob.hpp
538540
src/clp/ffi/utils.cpp
539541
src/clp/ffi/utils.hpp
540542
src/clp/ffi/Value.hpp
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#ifndef CLP_FFI_STRINGBLOB_HPP
2+
#define CLP_FFI_STRINGBLOB_HPP
3+
4+
#include <cstddef>
5+
#include <optional>
6+
#include <string>
7+
#include <string_view>
8+
#include <vector>
9+
10+
#include "../ErrorCode.hpp"
11+
#include "../ReaderInterface.hpp"
12+
13+
namespace clp::ffi {
14+
// Stores a list of strings as an indexable blob.
15+
class StringBlob {
16+
public:
17+
// Constructors
18+
StringBlob() = default;
19+
20+
// Methods
21+
[[nodiscard]] auto get_num_strings() const -> size_t { return m_offsets.size() - 1; }
22+
23+
/**
24+
* @param index
25+
* @return A view of the string at the given `index` in the blob.
26+
* @return std::nullopt if `index` is out of bounds.
27+
*/
28+
[[nodiscard]] auto get_string(size_t index) const -> std::optional<std::string_view> {
29+
if (index >= get_num_strings()) {
30+
return std::nullopt;
31+
}
32+
size_t const start_offset{m_offsets[index]};
33+
size_t const end_offset{m_offsets[index + 1]};
34+
return std::string_view{m_data}.substr(start_offset, end_offset - start_offset);
35+
}
36+
37+
/**
38+
* Reads a string of the given `length` from the `reader` and appends it to the blob.
39+
* @param reader
40+
* @param length The exact length of the string to read.
41+
* @return std::nullopt on success.
42+
* @return Forwards `ReaderInterface::try_read_exact_length`'s error code on failure.
43+
*/
44+
[[nodiscard]] auto read_from(ReaderInterface& reader, size_t length)
45+
-> std::optional<ErrorCode> {
46+
auto const start_offset{m_data.size()};
47+
auto const end_offset{start_offset + length};
48+
m_data.resize(static_cast<std::string::size_type>(end_offset));
49+
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
50+
if (auto const err{reader.try_read_exact_length(m_data.data() + start_offset, length)};
51+
ErrorCode::ErrorCode_Success != err)
52+
{
53+
m_data.resize(start_offset);
54+
return err;
55+
}
56+
m_offsets.emplace_back(end_offset);
57+
return std::nullopt;
58+
}
59+
60+
private:
61+
std::string m_data;
62+
std::vector<size_t> m_offsets{0};
63+
};
64+
} // namespace clp::ffi
65+
66+
#endif // CLP_FFI_STRINGBLOB_HPP
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#include <cstddef>
2+
#include <string>
3+
#include <string_view>
4+
#include <vector>
5+
6+
#include <catch2/catch_test_macros.hpp>
7+
8+
#include "../../BufferReader.hpp"
9+
#include "../../ErrorCode.hpp"
10+
#include "../StringBlob.hpp"
11+
12+
TEST_CASE("StringBlob basic functionality", "[StringBlob]") {
13+
clp::ffi::StringBlob string_blob;
14+
15+
std::vector<std::string> const test_strings{
16+
"Hello, World!",
17+
"This is a test string.",
18+
"StringBlob is working correctly.",
19+
};
20+
21+
std::string buffer;
22+
for (auto const& str : test_strings) {
23+
buffer += str;
24+
}
25+
clp::BufferReader reader{buffer.data(), buffer.size()};
26+
27+
size_t expected_num_strings{0};
28+
for (auto const& expected_str : test_strings) {
29+
REQUIRE((expected_num_strings == string_blob.get_num_strings()));
30+
31+
auto const result{string_blob.read_from(reader, expected_str.size())};
32+
REQUIRE_FALSE(result.has_value());
33+
++expected_num_strings;
34+
REQUIRE((expected_num_strings == string_blob.get_num_strings()));
35+
auto const optional_retrieved_str{string_blob.get_string(expected_num_strings - 1)};
36+
REQUIRE((optional_retrieved_str.has_value()));
37+
// NOLINTNEXTLINE(bugprone-unchecked-optional-access)
38+
REQUIRE((optional_retrieved_str.value() == expected_str));
39+
}
40+
41+
auto const read_from_eof{string_blob.read_from(reader, 1)};
42+
REQUIRE(read_from_eof.has_value());
43+
// NOLINTNEXTLINE(bugprone-unchecked-optional-access)
44+
REQUIRE((clp::ErrorCode::ErrorCode_EndOfFile == read_from_eof.value()));
45+
}

0 commit comments

Comments
 (0)