diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 7b5a4ff4c8..1be2fb3363 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -481,9 +481,13 @@ set(SOURCE_FILES_unitTest src/clp/EncodedVariableInterpreter.cpp src/clp/EncodedVariableInterpreter.hpp src/clp/ErrorCode.hpp + src/clp/ffi/EncodedTextAst.hpp + src/clp/ffi/EncodedTextAstError.cpp + src/clp/ffi/EncodedTextAstError.hpp src/clp/ffi/encoding_methods.cpp src/clp/ffi/encoding_methods.hpp src/clp/ffi/encoding_methods.inc + src/clp/ffi/test/test_EncodedTextAst.cpp src/clp/ffi/test/test_StringBlob.cpp src/clp/ffi/ir_stream/byteswap.hpp src/clp/ffi/ir_stream/Deserializer.hpp diff --git a/components/core/src/clp/ffi/EncodedTextAst.hpp b/components/core/src/clp/ffi/EncodedTextAst.hpp new file mode 100644 index 0000000000..40f81de41c --- /dev/null +++ b/components/core/src/clp/ffi/EncodedTextAst.hpp @@ -0,0 +1,265 @@ +#ifndef CLP_FFI_ENCODEDTEXTAST_HPP +#define CLP_FFI_ENCODEDTEXTAST_HPP + +#include +#include +#include +#include +#include +#include + +#include + +#include "../ir/types.hpp" +#include "../type_utils.hpp" +#include "EncodedTextAstError.hpp" +#include "encoding_methods.hpp" +#include "StringBlob.hpp" +#include "type_utils.hpp" + +namespace clp::ffi { +/** + * Method signature requirements for handling constant text segments in an encoded text AST. + * @tparam EncodedTextAstConstantHandlerType + */ +template +concept EncodedTextAstConstantHandlerReq + = requires(EncodedTextAstConstantHandlerType handler, std::string_view constant) { + { handler(constant) } -> std::same_as; + }; + +/** + * Method signature requirements for handling int variables in an encoded text AST. + * @tparam EncodedTextAstIntVarHandlerType + * @tparam encoded_variable_t + */ +template +concept EncodedTextAstIntVarHandlerReq + = requires(EncodedTextAstIntVarHandlerType handler, encoded_variable_t var) { + { handler(var) } -> std::same_as; + }; + +/** + * Method signature requirements for handling float variables in an encoded text AST. + * @tparam EncodedTextAstFloatVarHandlerType + * @tparam encoded_variable_t + */ +template +concept EncodedTextAstFloatVarHandlerReq + = requires(EncodedTextAstFloatVarHandlerType handler, encoded_variable_t var) { + { handler(var) } -> std::same_as; + }; + +/** + * Method signature requirements for handling dictionary variables in an encoded text AST. + * @tparam EncodedTextAstDictVarHandlerType + */ +template +concept EncodedTextAstDictVarHandlerReq + = requires(EncodedTextAstDictVarHandlerType handler, std::string_view var) { + { handler(var) } -> std::same_as; + }; + +/** + * A parsed and encoded unstructured text string. + * @tparam encoded_variable_t The type of encoded variables in the string. + */ +template +class EncodedTextAst { +public: + // Factory function + /** + * @param encoded_vars + * @param string_blob A string blob containing a list of dictionary variables followed by a + * logtype. + * @return A result containing the newly created `EncodedTextAst` instance on success, or an + * error code indicating the failure: + * - EncodedTextAstErrorEnum::MissingLogtype: if `string_blob` contains no strings. + */ + [[nodiscard]] static auto + create(std::vector encoded_vars, StringBlob string_blob) + -> ystdlib::error_handling::Result { + if (string_blob.get_num_strings() < 1) { + return EncodedTextAstError{EncodedTextAstErrorEnum::MissingLogtype}; + } + return EncodedTextAst{std::move(encoded_vars), std::move(string_blob)}; + } + + // Default copy & move constructors and assignment operators + EncodedTextAst(EncodedTextAst const&) = default; + EncodedTextAst(EncodedTextAst&&) noexcept = default; + auto operator=(EncodedTextAst const&) -> EncodedTextAst& = default; + auto operator=(EncodedTextAst&&) noexcept -> EncodedTextAst& = default; + + // Destructor + ~EncodedTextAst() = default; + + // Methods + [[nodiscard]] auto get_logtype() const -> std::string_view { + return m_string_blob.get_string(m_num_dict_vars).value(); + } + + /** + * Decodes the encoded text AST into its string form by calling the given handlers for each + * component of the message. + * @tparam unescape_logtype Whether to remove the escape characters from the logtype before + * calling `constant_handler`. + * @param constant_handler + * @param int_var_handler + * @param float_var_handler + * @param dict_var_handler + * @return A void result on success, or an error code indicating the failure: + * - EncodedTextAstErrorEnum::MissingEncodedVar if an encoded variable is missing. + * - EncodedTextAstErrorEnum::MissingDictVar if a dictionary variable is missing. + * - EncodedTextAstErrorEnum::UnexpectedTrailingEscapeCharacter if the logtype ends with an + * unexpected escape character. + */ + template + [[nodiscard]] auto decode( + EncodedTextAstConstantHandlerReq auto constant_handler, + EncodedTextAstIntVarHandlerReq auto int_var_handler, + EncodedTextAstFloatVarHandlerReq auto float_var_handler, + EncodedTextAstDictVarHandlerReq auto dict_var_handler + ) const -> ystdlib::error_handling::Result; + + /** + * Decodes and un-parses the encoded text AST into its string form. + * @return A result containing the decoded string on success, or an error code indicating the + * failure: + * - Forwards `decode`'s return values on failure. + */ + [[nodiscard]] auto to_string() const -> ystdlib::error_handling::Result { + std::string decoded_string; + YSTDLIB_ERROR_HANDLING_TRYV( + decode( + [&](std::string_view constant) { decoded_string.append(constant); }, + [&](encoded_variable_t int_var) { + decoded_string.append(decode_integer_var(int_var)); + }, + [&](encoded_variable_t float_var) { + decoded_string.append(decode_float_var(float_var)); + }, + [&](std::string_view dict_var) { decoded_string.append(dict_var); } + ) + ); + return decoded_string; + } + +private: + // Constructor + EncodedTextAst(std::vector encoded_vars, StringBlob string_blob) + : m_encoded_vars{std::move(encoded_vars)}, + m_string_blob{std::move(string_blob)}, + m_num_dict_vars{m_string_blob.get_num_strings() - 1} {} + + // Variables + std::vector m_encoded_vars; + StringBlob m_string_blob; + size_t m_num_dict_vars; +}; + +template +template +[[nodiscard]] auto EncodedTextAst::decode( + EncodedTextAstConstantHandlerReq auto constant_handler, + EncodedTextAstIntVarHandlerReq auto int_var_handler, + EncodedTextAstFloatVarHandlerReq auto float_var_handler, + EncodedTextAstDictVarHandlerReq auto dict_var_handler +) const -> ystdlib::error_handling::Result { + auto const logtype{get_logtype()}; + auto const logtype_length = logtype.length(); + auto const num_encoded_vars{m_encoded_vars.size()}; + + size_t next_static_text_begin_pos{0}; + size_t dictionary_vars_idx{0}; + size_t encoded_vars_idx{0}; + + for (size_t curr_pos{0}; curr_pos < logtype_length; ++curr_pos) { + auto const c{logtype.at(curr_pos)}; + switch (c) { + case enum_to_underlying_type(ir::VariablePlaceholder::Float): { + constant_handler(logtype.substr( + next_static_text_begin_pos, + curr_pos - next_static_text_begin_pos + )); + next_static_text_begin_pos = curr_pos + 1; + if (encoded_vars_idx >= num_encoded_vars) { + return EncodedTextAstError{EncodedTextAstErrorEnum::MissingEncodedVar}; + } + float_var_handler(m_encoded_vars.at(encoded_vars_idx)); + ++encoded_vars_idx; + break; + } + + case enum_to_underlying_type(ir::VariablePlaceholder::Integer): { + constant_handler(logtype.substr( + next_static_text_begin_pos, + curr_pos - next_static_text_begin_pos + )); + next_static_text_begin_pos = curr_pos + 1; + if (encoded_vars_idx >= num_encoded_vars) { + return EncodedTextAstError{EncodedTextAstErrorEnum::MissingEncodedVar}; + } + int_var_handler(m_encoded_vars.at(encoded_vars_idx)); + ++encoded_vars_idx; + break; + } + + case enum_to_underlying_type(ir::VariablePlaceholder::Dictionary): { + constant_handler(logtype.substr( + next_static_text_begin_pos, + curr_pos - next_static_text_begin_pos + )); + next_static_text_begin_pos = curr_pos + 1; + if (dictionary_vars_idx >= m_num_dict_vars) { + return EncodedTextAstError{EncodedTextAstErrorEnum::MissingDictVar}; + } + dict_var_handler(m_string_blob.get_string(dictionary_vars_idx).value()); + ++dictionary_vars_idx; + break; + } + + case enum_to_underlying_type(ir::VariablePlaceholder::Escape): { + // Ensure the escape character is followed by a character that's being escaped + if (curr_pos == logtype_length - 1) { + return EncodedTextAstError{ + EncodedTextAstErrorEnum::UnexpectedTrailingEscapeCharacter + }; + } + + if constexpr (unescape_logtype) { + constant_handler(logtype.substr( + next_static_text_begin_pos, + curr_pos - next_static_text_begin_pos + )); + // Skip the escape character + next_static_text_begin_pos = curr_pos + 1; + } + + // The character after the escape character is static text (regardless of whether it + // is a variable placeholder), so increment curr_pos by 1 to ensure we don't process + // the next character in any of the other cases (instead it will be added to the + // message). + ++curr_pos; + break; + } + + default: + // Regular characters. Do nothing. + continue; + } + } + + // Add remainder + if (next_static_text_begin_pos < logtype_length) { + constant_handler(logtype.substr( + next_static_text_begin_pos, + logtype_length - next_static_text_begin_pos + )); + } + + return ystdlib::error_handling::success(); +} +} // namespace clp::ffi + +#endif // CLP_FFI_ENCODEDTEXTAST_HPP diff --git a/components/core/src/clp/ffi/EncodedTextAstError.cpp b/components/core/src/clp/ffi/EncodedTextAstError.cpp new file mode 100644 index 0000000000..5c086b499a --- /dev/null +++ b/components/core/src/clp/ffi/EncodedTextAstError.cpp @@ -0,0 +1,29 @@ +#include "EncodedTextAstError.hpp" + +#include + +#include + +using clp::ffi::EncodedTextAstErrorEnum; +using EncodedTextAstErrorCategory = ystdlib::error_handling::ErrorCategory; + +template <> +auto EncodedTextAstErrorCategory::name() const noexcept -> char const* { + return "clp::ffi::EncodedTextAstErrorCode"; +} + +template <> +auto EncodedTextAstErrorCategory::message(EncodedTextAstErrorEnum error_enum) const -> std::string { + switch (error_enum) { + case EncodedTextAstErrorEnum::MissingEncodedVar: + return "An encoded variable is missing from the `EncodedTextAst`"; + case EncodedTextAstErrorEnum::MissingDictVar: + return "A dictionary variable is missing from the `EncodedTextAst`"; + case EncodedTextAstErrorEnum::MissingLogtype: + return "The logtype is missing from the `EncodedTextAst`"; + case EncodedTextAstErrorEnum::UnexpectedTrailingEscapeCharacter: + return "Unexpected escape character without escaped value at the end of the logtype"; + default: + return "Unknown error code enum"; + } +} diff --git a/components/core/src/clp/ffi/EncodedTextAstError.hpp b/components/core/src/clp/ffi/EncodedTextAstError.hpp new file mode 100644 index 0000000000..52f0d04066 --- /dev/null +++ b/components/core/src/clp/ffi/EncodedTextAstError.hpp @@ -0,0 +1,24 @@ +#ifndef CLP_FFI_ENCODEDTEXTASTERROR_HPP +#define CLP_FFI_ENCODEDTEXTASTERROR_HPP + +#include + +#include + +namespace clp::ffi { +/** + * Error enums for `EncodedTextAst`. + */ +enum class EncodedTextAstErrorEnum : uint8_t { + MissingDictVar = 1, + MissingEncodedVar, + MissingLogtype, + UnexpectedTrailingEscapeCharacter, +}; + +using EncodedTextAstError = ystdlib::error_handling::ErrorCode; +} // namespace clp::ffi + +YSTDLIB_ERROR_HANDLING_MARK_AS_ERROR_CODE_ENUM(clp::ffi::EncodedTextAstErrorEnum); + +#endif // CLP_FFI_ENCODEDTEXTASTERROR_HPP diff --git a/components/core/src/clp/ffi/StringBlob.hpp b/components/core/src/clp/ffi/StringBlob.hpp index e09e1e0fcb..8dc4e68ee7 100644 --- a/components/core/src/clp/ffi/StringBlob.hpp +++ b/components/core/src/clp/ffi/StringBlob.hpp @@ -57,6 +57,17 @@ class StringBlob { return std::nullopt; } + /** + * Appends a string to the end of the blob. + * @param str + */ + auto append(std::string_view str) -> void { + auto const start_offset{m_data.size()}; + auto const end_offset{start_offset + str.length()}; + m_data.append(str); + m_offsets.emplace_back(end_offset); + } + private: std::string m_data; std::vector m_offsets{0}; diff --git a/components/core/src/clp/ffi/test/test_EncodedTextAst.cpp b/components/core/src/clp/ffi/test/test_EncodedTextAst.cpp new file mode 100644 index 0000000000..f8a6e0e28c --- /dev/null +++ b/components/core/src/clp/ffi/test/test_EncodedTextAst.cpp @@ -0,0 +1,181 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "../../ir/types.hpp" +#include "../../type_utils.hpp" +#include "../EncodedTextAst.hpp" +#include "../EncodedTextAstError.hpp" +#include "../encoding_methods.hpp" +#include "../StringBlob.hpp" + +namespace clp::ffi::test { +namespace { +using ir::eight_byte_encoded_variable_t; +using ir::EncodedVariableTypeReq; +using ir::four_byte_encoded_variable_t; +using ir::VariablePlaceholder; + +/** + * @tparam encoded_variable_t + * @param input + * @return An encoded text AST constructed by encoding the given input string. + */ +template +[[nodiscard]] auto create_encoded_text_ast_from_string(std::string_view input) + -> EncodedTextAst; + +template +auto create_encoded_text_ast_from_string(std::string_view input) + -> EncodedTextAst { + std::string logtype; + std::vector encoded_vars; + std::vector dict_var_bounds; + REQUIRE(encode_message(input, logtype, encoded_vars, dict_var_bounds)); + + StringBlob string_blob; + for (size_t i{0}; i < dict_var_bounds.size(); i += 2) { + auto const begin{static_cast(dict_var_bounds[i])}; + auto const length{static_cast(dict_var_bounds[i + 1]) - begin}; + auto const dict_var{input.substr(begin, length)}; + string_blob.append(dict_var); + } + + string_blob.append(logtype); + + auto encoded_text_ast_result{EncodedTextAst::create( + std::move(encoded_vars), + std::move(string_blob) + )}; + REQUIRE_FALSE(encoded_text_ast_result.has_error()); + return std::move(encoded_text_ast_result.value()); +} +} // namespace + +TEMPLATE_TEST_CASE( + "EncodedTextAst Decoding", + "[ffi][EncodedTextAst]", + eight_byte_encoded_variable_t, + four_byte_encoded_variable_t +) { + SECTION("Text with variables") { + std::vector> const test_str_components{ + {"Here is a string with a small int ", "2887"}, + {"and a medium int ", std::to_string(INT32_MAX)}, + {"and a very large int ", std::to_string(INT64_MAX)}, + {"and a small float ", "0.1"}, + {"and a medium float ", "-25.519686"}, + {"and a long float ", "-25.5196868642755"}, + {"and a weird float ", "-00.00"}, + {"and a string with numbers ", "bin/python3.14.0"}, + {"and another string with numbers ", "abc123"}, + {"and a dict var=", "IamString"}, + {"and another dict var=", "DictVarWith\\escape"}, + {"and an int var placeholder: ", + std::string(1, enum_to_underlying_type(VariablePlaceholder::Integer))}, + {"and a float var placeholder: ", + std::string(1, enum_to_underlying_type(VariablePlaceholder::Float))}, + {"and a dict var placeholder: ", + std::string(1, enum_to_underlying_type(VariablePlaceholder::Dictionary))}, + {"and a valid trailing escape: ", + std::string(2, enum_to_underlying_type(VariablePlaceholder::Escape))}, + }; + auto const text = fmt::format( + "{}", + fmt::join( + test_str_components | std::views::transform([](auto const& pair) { + return pair.first + pair.second; + }), + " " + ) + ); + + auto const encoded_text_ast{create_encoded_text_ast_from_string(text)}; + auto const decoded_text_result{encoded_text_ast.to_string()}; + REQUIRE_FALSE(decoded_text_result.has_error()); + REQUIRE((decoded_text_result.value() == text)); + } + + SECTION("Text without variables") { + constexpr std::string_view cText{"This is a static message."}; + auto const encoded_text_ast{create_encoded_text_ast_from_string(cText)}; + REQUIRE((encoded_text_ast.get_logtype() == cText)); + auto const decoded_text_result{encoded_text_ast.to_string()}; + REQUIRE_FALSE((decoded_text_result.has_error())); + REQUIRE((decoded_text_result.value() == cText)); + } + + SECTION("Decoding errors") { + SECTION("Missing logtype") { + auto const encoded_text_ast_result{ + EncodedTextAst::create(std::vector{}, StringBlob{}) + }; + REQUIRE(encoded_text_ast_result.has_error()); + REQUIRE( + (encoded_text_ast_result.error() + == EncodedTextAstError{EncodedTextAstErrorEnum::MissingLogtype}) + ); + } + + SECTION("Missing variables") { + auto const [placeholder, expected_error_enum] = GENERATE( + std::make_pair( + enum_to_underlying_type(VariablePlaceholder::Integer), + EncodedTextAstErrorEnum::MissingEncodedVar + ), + std::make_pair( + enum_to_underlying_type(VariablePlaceholder::Float), + EncodedTextAstErrorEnum::MissingEncodedVar + ), + std::make_pair( + enum_to_underlying_type(VariablePlaceholder::Dictionary), + EncodedTextAstErrorEnum::MissingDictVar + ) + ); + std::string const logtype_with_single_int_var{placeholder}; + StringBlob string_blob; + string_blob.append(logtype_with_single_int_var); + auto const encoded_text_ast_result{EncodedTextAst::create( + std::vector{}, + std::move(string_blob) + )}; + REQUIRE_FALSE(encoded_text_ast_result.has_error()); + auto const decoded_result{encoded_text_ast_result.value().to_string()}; + REQUIRE(decoded_result.has_error()); + REQUIRE((decoded_result.error() == EncodedTextAstError{expected_error_enum})); + } + + SECTION("Trailing escape") { + std::string const logtype_with_trailing_escape{ + "This is a string with a trailing escape " + + std::string(1, enum_to_underlying_type(VariablePlaceholder::Escape)) + }; + StringBlob string_blob; + string_blob.append(logtype_with_trailing_escape); + auto const encoded_text_ast_result{EncodedTextAst::create( + std::vector{}, + std::move(string_blob) + )}; + REQUIRE_FALSE(encoded_text_ast_result.has_error()); + auto const decoded_result{encoded_text_ast_result.value().to_string()}; + REQUIRE(decoded_result.has_error()); + REQUIRE( + (decoded_result.error() + == EncodedTextAstError{ + EncodedTextAstErrorEnum::UnexpectedTrailingEscapeCharacter + }) + ); + } + } +} +} // namespace clp::ffi::test diff --git a/components/core/src/clp/ffi/test/test_StringBlob.cpp b/components/core/src/clp/ffi/test/test_StringBlob.cpp index 2721420400..a853e4de70 100644 --- a/components/core/src/clp/ffi/test/test_StringBlob.cpp +++ b/components/core/src/clp/ffi/test/test_StringBlob.cpp @@ -9,8 +9,9 @@ #include "../../ErrorCode.hpp" #include "../StringBlob.hpp" +namespace clp::ffi::test { TEST_CASE("StringBlob basic functionality", "[StringBlob]") { - clp::ffi::StringBlob string_blob; + StringBlob string_blob; std::vector const test_strings{ "Hello, World!", @@ -22,7 +23,7 @@ TEST_CASE("StringBlob basic functionality", "[StringBlob]") { for (auto const& str : test_strings) { buffer += str; } - clp::BufferReader reader{buffer.data(), buffer.size()}; + BufferReader reader{buffer.data(), buffer.size()}; size_t expected_num_strings{0}; for (auto const& expected_str : test_strings) { @@ -43,3 +44,4 @@ TEST_CASE("StringBlob basic functionality", "[StringBlob]") { // NOLINTNEXTLINE(bugprone-unchecked-optional-access) REQUIRE((clp::ErrorCode::ErrorCode_EndOfFile == read_from_eof.value())); } +} // namespace clp::ffi::test diff --git a/components/core/src/clp/ir/types.hpp b/components/core/src/clp/ir/types.hpp index d8cb1cd372..2529ddc330 100644 --- a/components/core/src/clp/ir/types.hpp +++ b/components/core/src/clp/ir/types.hpp @@ -2,12 +2,18 @@ #define CLP_IR_TYPES_HPP #include +#include namespace clp::ir { using epoch_time_ms_t = int64_t; using eight_byte_encoded_variable_t = int64_t; using four_byte_encoded_variable_t = int32_t; +template +concept EncodedVariableTypeReq + = std::is_same_v + || std::is_same_v; + enum class VariablePlaceholder : char { Integer = 0x11, Dictionary = 0x12,