Skip to content

Commit f9d75d7

Browse files
feat(ffi): Add new EncodedTextAst implementation that uses StringBlob to reduce allocation overhead. (#1561)
Co-authored-by: Devin Gibson <[email protected]>
1 parent 0274e6c commit f9d75d7

File tree

8 files changed

+524
-2
lines changed

8 files changed

+524
-2
lines changed

components/core/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,9 +481,13 @@ set(SOURCE_FILES_unitTest
481481
src/clp/EncodedVariableInterpreter.cpp
482482
src/clp/EncodedVariableInterpreter.hpp
483483
src/clp/ErrorCode.hpp
484+
src/clp/ffi/EncodedTextAst.hpp
485+
src/clp/ffi/EncodedTextAstError.cpp
486+
src/clp/ffi/EncodedTextAstError.hpp
484487
src/clp/ffi/encoding_methods.cpp
485488
src/clp/ffi/encoding_methods.hpp
486489
src/clp/ffi/encoding_methods.inc
490+
src/clp/ffi/test/test_EncodedTextAst.cpp
487491
src/clp/ffi/test/test_StringBlob.cpp
488492
src/clp/ffi/ir_stream/byteswap.hpp
489493
src/clp/ffi/ir_stream/Deserializer.hpp
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
#ifndef CLP_FFI_ENCODEDTEXTAST_HPP
2+
#define CLP_FFI_ENCODEDTEXTAST_HPP
3+
4+
#include <concepts>
5+
#include <cstddef>
6+
#include <string>
7+
#include <string_view>
8+
#include <utility>
9+
#include <vector>
10+
11+
#include <ystdlib/error_handling/Result.hpp>
12+
13+
#include "../ir/types.hpp"
14+
#include "../type_utils.hpp"
15+
#include "EncodedTextAstError.hpp"
16+
#include "encoding_methods.hpp"
17+
#include "StringBlob.hpp"
18+
#include "type_utils.hpp"
19+
20+
namespace clp::ffi {
21+
/**
22+
* Method signature requirements for handling constant text segments in an encoded text AST.
23+
* @tparam EncodedTextAstConstantHandlerType
24+
*/
25+
template <typename EncodedTextAstConstantHandlerType>
26+
concept EncodedTextAstConstantHandlerReq
27+
= requires(EncodedTextAstConstantHandlerType handler, std::string_view constant) {
28+
{ handler(constant) } -> std::same_as<void>;
29+
};
30+
31+
/**
32+
* Method signature requirements for handling int variables in an encoded text AST.
33+
* @tparam EncodedTextAstIntVarHandlerType
34+
* @tparam encoded_variable_t
35+
*/
36+
template <typename EncodedTextAstIntVarHandlerType, typename encoded_variable_t>
37+
concept EncodedTextAstIntVarHandlerReq
38+
= requires(EncodedTextAstIntVarHandlerType handler, encoded_variable_t var) {
39+
{ handler(var) } -> std::same_as<void>;
40+
};
41+
42+
/**
43+
* Method signature requirements for handling float variables in an encoded text AST.
44+
* @tparam EncodedTextAstFloatVarHandlerType
45+
* @tparam encoded_variable_t
46+
*/
47+
template <typename EncodedTextAstFloatVarHandlerType, typename encoded_variable_t>
48+
concept EncodedTextAstFloatVarHandlerReq
49+
= requires(EncodedTextAstFloatVarHandlerType handler, encoded_variable_t var) {
50+
{ handler(var) } -> std::same_as<void>;
51+
};
52+
53+
/**
54+
* Method signature requirements for handling dictionary variables in an encoded text AST.
55+
* @tparam EncodedTextAstDictVarHandlerType
56+
*/
57+
template <typename EncodedTextAstDictVarHandlerType>
58+
concept EncodedTextAstDictVarHandlerReq
59+
= requires(EncodedTextAstDictVarHandlerType handler, std::string_view var) {
60+
{ handler(var) } -> std::same_as<void>;
61+
};
62+
63+
/**
64+
* A parsed and encoded unstructured text string.
65+
* @tparam encoded_variable_t The type of encoded variables in the string.
66+
*/
67+
template <ir::EncodedVariableTypeReq encoded_variable_t>
68+
class EncodedTextAst {
69+
public:
70+
// Factory function
71+
/**
72+
* @param encoded_vars
73+
* @param string_blob A string blob containing a list of dictionary variables followed by a
74+
* logtype.
75+
* @return A result containing the newly created `EncodedTextAst` instance on success, or an
76+
* error code indicating the failure:
77+
* - EncodedTextAstErrorEnum::MissingLogtype: if `string_blob` contains no strings.
78+
*/
79+
[[nodiscard]] static auto
80+
create(std::vector<encoded_variable_t> encoded_vars, StringBlob string_blob)
81+
-> ystdlib::error_handling::Result<EncodedTextAst> {
82+
if (string_blob.get_num_strings() < 1) {
83+
return EncodedTextAstError{EncodedTextAstErrorEnum::MissingLogtype};
84+
}
85+
return EncodedTextAst{std::move(encoded_vars), std::move(string_blob)};
86+
}
87+
88+
// Default copy & move constructors and assignment operators
89+
EncodedTextAst(EncodedTextAst const&) = default;
90+
EncodedTextAst(EncodedTextAst&&) noexcept = default;
91+
auto operator=(EncodedTextAst const&) -> EncodedTextAst& = default;
92+
auto operator=(EncodedTextAst&&) noexcept -> EncodedTextAst& = default;
93+
94+
// Destructor
95+
~EncodedTextAst() = default;
96+
97+
// Methods
98+
[[nodiscard]] auto get_logtype() const -> std::string_view {
99+
return m_string_blob.get_string(m_num_dict_vars).value();
100+
}
101+
102+
/**
103+
* Decodes the encoded text AST into its string form by calling the given handlers for each
104+
* component of the message.
105+
* @tparam unescape_logtype Whether to remove the escape characters from the logtype before
106+
* calling `constant_handler`.
107+
* @param constant_handler
108+
* @param int_var_handler
109+
* @param float_var_handler
110+
* @param dict_var_handler
111+
* @return A void result on success, or an error code indicating the failure:
112+
* - EncodedTextAstErrorEnum::MissingEncodedVar if an encoded variable is missing.
113+
* - EncodedTextAstErrorEnum::MissingDictVar if a dictionary variable is missing.
114+
* - EncodedTextAstErrorEnum::UnexpectedTrailingEscapeCharacter if the logtype ends with an
115+
* unexpected escape character.
116+
*/
117+
template <bool unescape_logtype>
118+
[[nodiscard]] auto decode(
119+
EncodedTextAstConstantHandlerReq auto constant_handler,
120+
EncodedTextAstIntVarHandlerReq<encoded_variable_t> auto int_var_handler,
121+
EncodedTextAstFloatVarHandlerReq<encoded_variable_t> auto float_var_handler,
122+
EncodedTextAstDictVarHandlerReq auto dict_var_handler
123+
) const -> ystdlib::error_handling::Result<void>;
124+
125+
/**
126+
* Decodes and un-parses the encoded text AST into its string form.
127+
* @return A result containing the decoded string on success, or an error code indicating the
128+
* failure:
129+
* - Forwards `decode`'s return values on failure.
130+
*/
131+
[[nodiscard]] auto to_string() const -> ystdlib::error_handling::Result<std::string> {
132+
std::string decoded_string;
133+
YSTDLIB_ERROR_HANDLING_TRYV(
134+
decode<true>(
135+
[&](std::string_view constant) { decoded_string.append(constant); },
136+
[&](encoded_variable_t int_var) {
137+
decoded_string.append(decode_integer_var(int_var));
138+
},
139+
[&](encoded_variable_t float_var) {
140+
decoded_string.append(decode_float_var(float_var));
141+
},
142+
[&](std::string_view dict_var) { decoded_string.append(dict_var); }
143+
)
144+
);
145+
return decoded_string;
146+
}
147+
148+
private:
149+
// Constructor
150+
EncodedTextAst(std::vector<encoded_variable_t> encoded_vars, StringBlob string_blob)
151+
: m_encoded_vars{std::move(encoded_vars)},
152+
m_string_blob{std::move(string_blob)},
153+
m_num_dict_vars{m_string_blob.get_num_strings() - 1} {}
154+
155+
// Variables
156+
std::vector<encoded_variable_t> m_encoded_vars;
157+
StringBlob m_string_blob;
158+
size_t m_num_dict_vars;
159+
};
160+
161+
template <ir::EncodedVariableTypeReq encoded_variable_t>
162+
template <bool unescape_logtype>
163+
[[nodiscard]] auto EncodedTextAst<encoded_variable_t>::decode(
164+
EncodedTextAstConstantHandlerReq auto constant_handler,
165+
EncodedTextAstIntVarHandlerReq<encoded_variable_t> auto int_var_handler,
166+
EncodedTextAstFloatVarHandlerReq<encoded_variable_t> auto float_var_handler,
167+
EncodedTextAstDictVarHandlerReq auto dict_var_handler
168+
) const -> ystdlib::error_handling::Result<void> {
169+
auto const logtype{get_logtype()};
170+
auto const logtype_length = logtype.length();
171+
auto const num_encoded_vars{m_encoded_vars.size()};
172+
173+
size_t next_static_text_begin_pos{0};
174+
size_t dictionary_vars_idx{0};
175+
size_t encoded_vars_idx{0};
176+
177+
for (size_t curr_pos{0}; curr_pos < logtype_length; ++curr_pos) {
178+
auto const c{logtype.at(curr_pos)};
179+
switch (c) {
180+
case enum_to_underlying_type(ir::VariablePlaceholder::Float): {
181+
constant_handler(logtype.substr(
182+
next_static_text_begin_pos,
183+
curr_pos - next_static_text_begin_pos
184+
));
185+
next_static_text_begin_pos = curr_pos + 1;
186+
if (encoded_vars_idx >= num_encoded_vars) {
187+
return EncodedTextAstError{EncodedTextAstErrorEnum::MissingEncodedVar};
188+
}
189+
float_var_handler(m_encoded_vars.at(encoded_vars_idx));
190+
++encoded_vars_idx;
191+
break;
192+
}
193+
194+
case enum_to_underlying_type(ir::VariablePlaceholder::Integer): {
195+
constant_handler(logtype.substr(
196+
next_static_text_begin_pos,
197+
curr_pos - next_static_text_begin_pos
198+
));
199+
next_static_text_begin_pos = curr_pos + 1;
200+
if (encoded_vars_idx >= num_encoded_vars) {
201+
return EncodedTextAstError{EncodedTextAstErrorEnum::MissingEncodedVar};
202+
}
203+
int_var_handler(m_encoded_vars.at(encoded_vars_idx));
204+
++encoded_vars_idx;
205+
break;
206+
}
207+
208+
case enum_to_underlying_type(ir::VariablePlaceholder::Dictionary): {
209+
constant_handler(logtype.substr(
210+
next_static_text_begin_pos,
211+
curr_pos - next_static_text_begin_pos
212+
));
213+
next_static_text_begin_pos = curr_pos + 1;
214+
if (dictionary_vars_idx >= m_num_dict_vars) {
215+
return EncodedTextAstError{EncodedTextAstErrorEnum::MissingDictVar};
216+
}
217+
dict_var_handler(m_string_blob.get_string(dictionary_vars_idx).value());
218+
++dictionary_vars_idx;
219+
break;
220+
}
221+
222+
case enum_to_underlying_type(ir::VariablePlaceholder::Escape): {
223+
// Ensure the escape character is followed by a character that's being escaped
224+
if (curr_pos == logtype_length - 1) {
225+
return EncodedTextAstError{
226+
EncodedTextAstErrorEnum::UnexpectedTrailingEscapeCharacter
227+
};
228+
}
229+
230+
if constexpr (unescape_logtype) {
231+
constant_handler(logtype.substr(
232+
next_static_text_begin_pos,
233+
curr_pos - next_static_text_begin_pos
234+
));
235+
// Skip the escape character
236+
next_static_text_begin_pos = curr_pos + 1;
237+
}
238+
239+
// The character after the escape character is static text (regardless of whether it
240+
// is a variable placeholder), so increment curr_pos by 1 to ensure we don't process
241+
// the next character in any of the other cases (instead it will be added to the
242+
// message).
243+
++curr_pos;
244+
break;
245+
}
246+
247+
default:
248+
// Regular characters. Do nothing.
249+
continue;
250+
}
251+
}
252+
253+
// Add remainder
254+
if (next_static_text_begin_pos < logtype_length) {
255+
constant_handler(logtype.substr(
256+
next_static_text_begin_pos,
257+
logtype_length - next_static_text_begin_pos
258+
));
259+
}
260+
261+
return ystdlib::error_handling::success();
262+
}
263+
} // namespace clp::ffi
264+
265+
#endif // CLP_FFI_ENCODEDTEXTAST_HPP
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#include "EncodedTextAstError.hpp"
2+
3+
#include <string>
4+
5+
#include <ystdlib/error_handling/ErrorCode.hpp>
6+
7+
using clp::ffi::EncodedTextAstErrorEnum;
8+
using EncodedTextAstErrorCategory = ystdlib::error_handling::ErrorCategory<EncodedTextAstErrorEnum>;
9+
10+
template <>
11+
auto EncodedTextAstErrorCategory::name() const noexcept -> char const* {
12+
return "clp::ffi::EncodedTextAstErrorCode";
13+
}
14+
15+
template <>
16+
auto EncodedTextAstErrorCategory::message(EncodedTextAstErrorEnum error_enum) const -> std::string {
17+
switch (error_enum) {
18+
case EncodedTextAstErrorEnum::MissingEncodedVar:
19+
return "An encoded variable is missing from the `EncodedTextAst`";
20+
case EncodedTextAstErrorEnum::MissingDictVar:
21+
return "A dictionary variable is missing from the `EncodedTextAst`";
22+
case EncodedTextAstErrorEnum::MissingLogtype:
23+
return "The logtype is missing from the `EncodedTextAst`";
24+
case EncodedTextAstErrorEnum::UnexpectedTrailingEscapeCharacter:
25+
return "Unexpected escape character without escaped value at the end of the logtype";
26+
default:
27+
return "Unknown error code enum";
28+
}
29+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#ifndef CLP_FFI_ENCODEDTEXTASTERROR_HPP
2+
#define CLP_FFI_ENCODEDTEXTASTERROR_HPP
3+
4+
#include <cstdint>
5+
6+
#include <ystdlib/error_handling/ErrorCode.hpp>
7+
8+
namespace clp::ffi {
9+
/**
10+
* Error enums for `EncodedTextAst`.
11+
*/
12+
enum class EncodedTextAstErrorEnum : uint8_t {
13+
MissingDictVar = 1,
14+
MissingEncodedVar,
15+
MissingLogtype,
16+
UnexpectedTrailingEscapeCharacter,
17+
};
18+
19+
using EncodedTextAstError = ystdlib::error_handling::ErrorCode<EncodedTextAstErrorEnum>;
20+
} // namespace clp::ffi
21+
22+
YSTDLIB_ERROR_HANDLING_MARK_AS_ERROR_CODE_ENUM(clp::ffi::EncodedTextAstErrorEnum);
23+
24+
#endif // CLP_FFI_ENCODEDTEXTASTERROR_HPP

components/core/src/clp/ffi/StringBlob.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,17 @@ class StringBlob {
5757
return std::nullopt;
5858
}
5959

60+
/**
61+
* Appends a string to the end of the blob.
62+
* @param str
63+
*/
64+
auto append(std::string_view str) -> void {
65+
auto const start_offset{m_data.size()};
66+
auto const end_offset{start_offset + str.length()};
67+
m_data.append(str);
68+
m_offsets.emplace_back(end_offset);
69+
}
70+
6071
private:
6172
std::string m_data;
6273
std::vector<size_t> m_offsets{0};

0 commit comments

Comments
 (0)