From 63a5e3f535c137094ab2015465f03aa70f3d2561 Mon Sep 17 00:00:00 2001 From: gibber9809 Date: Tue, 29 Jul 2025 14:43:24 +0000 Subject: [PATCH 1/9] Update EncodedVariableInterpreter to use templates in order to break dependence on specific clp implementations; Use string_view where possible instead of std::string const& --- .../src/clp/EncodedVariableInterpreter.cpp | 310 +------------ .../src/clp/EncodedVariableInterpreter.hpp | 420 ++++++++++++++++-- .../tests/test-EncodedVariableInterpreter.cpp | 3 + 3 files changed, 399 insertions(+), 334 deletions(-) diff --git a/components/core/src/clp/EncodedVariableInterpreter.cpp b/components/core/src/clp/EncodedVariableInterpreter.cpp index 6d9bb14f12..61a976f8d7 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.cpp +++ b/components/core/src/clp/EncodedVariableInterpreter.cpp @@ -2,25 +2,17 @@ #include #include -#include +#include +#include #include #include "Defs.h" -#include "ffi/ir_stream/decoding_methods.hpp" -#include "ir/LogEvent.hpp" -#include "ir/types.hpp" -#include "spdlog_with_specializations.hpp" #include "type_utils.hpp" using clp::ffi::cEightByteEncodedFloatDigitsBitMask; -using clp::ir::eight_byte_encoded_variable_t; -using clp::ir::four_byte_encoded_variable_t; -using clp::ir::LogEvent; -using clp::ir::VariablePlaceholder; using std::string; -using std::unordered_set; -using std::vector; +using std::string_view; namespace clp { variable_dictionary_id_t EncodedVariableInterpreter::decode_var_dict_id( @@ -30,7 +22,7 @@ variable_dictionary_id_t EncodedVariableInterpreter::decode_var_dict_id( } bool EncodedVariableInterpreter::convert_string_to_representable_integer_var( - string const& value, + string_view value, encoded_variable_t& encoded_var ) { size_t length = value.length(); @@ -69,7 +61,7 @@ bool EncodedVariableInterpreter::convert_string_to_representable_integer_var( } bool EncodedVariableInterpreter::convert_string_to_representable_float_var( - string const& value, + string_view value, encoded_variable_t& encoded_var ) { if (value.empty()) { @@ -204,299 +196,7 @@ void EncodedVariableInterpreter::convert_encoded_float_to_string( value[value_length - 1 - decimal_pos] = '.'; } -void EncodedVariableInterpreter::encode_and_add_to_dictionary( - string const& message, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - vector& encoded_vars, - vector& var_ids -) { - // Extract all variables and add to dictionary while building logtype - size_t var_begin_pos = 0; - size_t var_end_pos = 0; - string var_str; - logtype_dict_entry.clear(); - // To avoid reallocating the logtype as we append to it, reserve enough space to hold the entire - // message - logtype_dict_entry.reserve_constant_length(message.length()); - while (logtype_dict_entry.parse_next_var(message, var_begin_pos, var_end_pos, var_str)) { - auto encoded_var = encode_var(var_str, logtype_dict_entry, var_dict, var_ids); - encoded_vars.push_back(encoded_var); - } -} - -template -void EncodedVariableInterpreter::encode_and_add_to_dictionary( - LogEvent const& log_event, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - std::vector& encoded_vars, - std::vector& var_ids, - size_t& raw_num_bytes -) { - logtype_dict_entry.clear(); - auto const& log_message = log_event.get_message(); - logtype_dict_entry.reserve_constant_length(log_message.get_logtype().length()); - - raw_num_bytes = 0; - - auto constant_handler = [&](std::string const& value, size_t begin_pos, size_t length) { - raw_num_bytes += length; - logtype_dict_entry.add_constant(value, begin_pos, length); - }; - - auto encoded_int_handler = [&](encoded_variable_t encoded_var) { - raw_num_bytes += ffi::decode_integer_var(encoded_var).length(); - logtype_dict_entry.add_int_var(); - - eight_byte_encoded_variable_t eight_byte_encoded_var{}; - if constexpr (std::is_same_v) { - eight_byte_encoded_var = encoded_var; - } else { // std::is_same_v - eight_byte_encoded_var = ffi::encode_four_byte_integer_as_eight_byte(encoded_var); - } - encoded_vars.push_back(eight_byte_encoded_var); - }; - - auto encoded_float_handler = [&](four_byte_encoded_variable_t encoded_var) { - raw_num_bytes += ffi::decode_float_var(encoded_var).length(); - logtype_dict_entry.add_float_var(); - - eight_byte_encoded_variable_t eight_byte_encoded_var{}; - if constexpr (std::is_same_v) { - eight_byte_encoded_var = encoded_var; - } else { // std::is_same_v - eight_byte_encoded_var = ffi::encode_four_byte_float_as_eight_byte(encoded_var); - } - encoded_vars.push_back(eight_byte_encoded_var); - }; - - auto dict_var_handler = [&](string const& dict_var) { - raw_num_bytes += dict_var.length(); - - eight_byte_encoded_variable_t encoded_var{}; - if constexpr (std::is_same_v) { - encoded_var = encode_var_dict_id( - add_dict_var(dict_var, logtype_dict_entry, var_dict, var_ids) - ); - } else { // std::is_same_v - encoded_var = encode_var(dict_var, logtype_dict_entry, var_dict, var_ids); - } - encoded_vars.push_back(encoded_var); - }; - - ffi::ir_stream::generic_decode_message( - log_message.get_logtype(), - log_message.get_encoded_vars(), - log_message.get_dict_vars(), - constant_handler, - encoded_int_handler, - encoded_float_handler, - dict_var_handler - ); -} - -bool EncodedVariableInterpreter::decode_variables_into_message( - LogTypeDictionaryEntry const& logtype_dict_entry, - VariableDictionaryReader const& var_dict, - vector const& encoded_vars, - string& decompressed_msg -) { - // Ensure the number of variables in the logtype matches the number of encoded variables given - auto const& logtype_value = logtype_dict_entry.get_value(); - size_t const num_vars = logtype_dict_entry.get_num_variables(); - if (num_vars != encoded_vars.size()) { - SPDLOG_ERROR( - "EncodedVariableInterpreter: Logtype '{}' contains {} variables, but {} were given " - "for decoding.", - logtype_value.c_str(), - num_vars, - encoded_vars.size() - ); - return false; - } - - VariablePlaceholder var_placeholder; - size_t constant_begin_pos = 0; - string float_str; - variable_dictionary_id_t var_dict_id; - size_t const num_placeholders_in_logtype = logtype_dict_entry.get_num_placeholders(); - for (size_t placeholder_ix = 0, var_ix = 0; placeholder_ix < num_placeholders_in_logtype; - ++placeholder_ix) - { - size_t placeholder_position - = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); - - // Add the constant that's between the last placeholder and this one - decompressed_msg.append( - logtype_value, - constant_begin_pos, - placeholder_position - constant_begin_pos - ); - switch (var_placeholder) { - case VariablePlaceholder::Integer: - decompressed_msg += std::to_string(encoded_vars[var_ix++]); - break; - case VariablePlaceholder::Float: - convert_encoded_float_to_string(encoded_vars[var_ix++], float_str); - decompressed_msg += float_str; - break; - case VariablePlaceholder::Dictionary: - var_dict_id = decode_var_dict_id(encoded_vars[var_ix++]); - decompressed_msg += var_dict.get_value(var_dict_id); - break; - case VariablePlaceholder::Escape: - break; - default: - SPDLOG_ERROR( - "EncodedVariableInterpreter: Logtype '{}' contains unexpected variable " - "placeholder 0x{:x}", - logtype_value, - enum_to_underlying_type(var_placeholder) - ); - return false; - } - // Move past the variable placeholder - constant_begin_pos = placeholder_position + 1; - } - // Append remainder of logtype, if any - if (constant_begin_pos < logtype_value.length()) { - decompressed_msg.append(logtype_value, constant_begin_pos, string::npos); - } - - return true; -} - -bool EncodedVariableInterpreter::encode_and_search_dictionary( - string const& var_str, - VariableDictionaryReader const& var_dict, - bool ignore_case, - string& logtype, - SubQuery& sub_query -) { - size_t length = var_str.length(); - if (0 == length) { - throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); - } - - encoded_variable_t encoded_var; - if (convert_string_to_representable_integer_var(var_str, encoded_var)) { - LogTypeDictionaryEntry::add_int_var(logtype); - sub_query.add_non_dict_var(encoded_var); - } else if (convert_string_to_representable_float_var(var_str, encoded_var)) { - LogTypeDictionaryEntry::add_float_var(logtype); - sub_query.add_non_dict_var(encoded_var); - } else { - auto const entries = var_dict.get_entry_matching_value(var_str, ignore_case); - if (entries.empty()) { - // Not in dictionary - return false; - } - - LogTypeDictionaryEntry::add_dict_var(logtype); - - if (entries.size() == 1) { - auto const* entry = entries.at(0); - sub_query.add_dict_var(encode_var_dict_id(entry->get_id()), entry); - return true; - } - - std::unordered_set const entries_set{ - entries.cbegin(), - entries.cend() - }; - std::unordered_set encoded_vars; - encoded_vars.reserve(entries.size()); - for (auto const* entry : entries) { - encoded_vars.emplace(encode_var_dict_id(entry->get_id())); - } - sub_query.add_imprecise_dict_var(encoded_vars, entries_set); - } - - return true; -} - -bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matches( - std::string const& var_wildcard_str, - VariableDictionaryReader const& var_dict, - bool ignore_case, - SubQuery& sub_query -) { - // Find matches - unordered_set var_dict_entries; - var_dict.get_entries_matching_wildcard_string(var_wildcard_str, ignore_case, var_dict_entries); - if (var_dict_entries.empty()) { - // Not in dictionary - return false; - } - - // Encode matches - unordered_set encoded_vars; - for (auto entry : var_dict_entries) { - encoded_vars.insert(encode_var_dict_id(entry->get_id())); - } - - sub_query.add_imprecise_dict_var(encoded_vars, var_dict_entries); - - return true; -} - encoded_variable_t EncodedVariableInterpreter::encode_var_dict_id(variable_dictionary_id_t id) { return bit_cast(id); } - -encoded_variable_t EncodedVariableInterpreter::encode_var( - string const& var, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - vector& var_ids -) { - encoded_variable_t encoded_var{0}; - if (convert_string_to_representable_integer_var(var, encoded_var)) { - logtype_dict_entry.add_int_var(); - } else if (convert_string_to_representable_float_var(var, encoded_var)) { - logtype_dict_entry.add_float_var(); - } else { - // Variable string looks like a dictionary variable, so encode it as so - encoded_var = encode_var_dict_id(add_dict_var(var, logtype_dict_entry, var_dict, var_ids)); - } - return encoded_var; -} - -variable_dictionary_id_t EncodedVariableInterpreter::add_dict_var( - string const& var, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - vector& var_ids -) { - variable_dictionary_id_t id{cVariableDictionaryIdMax}; - var_dict.add_entry(var, id); - var_ids.push_back(id); - - logtype_dict_entry.add_dictionary_var(); - - return id; -} - -// Explicitly declare template specializations so that we can define the template methods in this -// file -template void -EncodedVariableInterpreter::encode_and_add_to_dictionary( - LogEvent const& log_event, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - std::vector& encoded_vars, - std::vector& var_ids, - size_t& raw_num_bytes -); - -template void -EncodedVariableInterpreter::encode_and_add_to_dictionary( - LogEvent const& log_event, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - std::vector& encoded_vars, - std::vector& var_ids, - size_t& raw_num_bytes -); } // namespace clp diff --git a/components/core/src/clp/EncodedVariableInterpreter.hpp b/components/core/src/clp/EncodedVariableInterpreter.hpp index 9bb216a29a..ce611481a3 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.hpp +++ b/components/core/src/clp/EncodedVariableInterpreter.hpp @@ -2,14 +2,18 @@ #define CLP_ENCODEDVARIABLEINTERPRETER_HPP #include +#include +#include #include +#include "DictionaryConcepts.hpp" +#include "ffi/ir_stream/decoding_methods.hpp" #include "ir/LogEvent.hpp" #include "ir/types.hpp" #include "Query.hpp" +#include "spdlog_with_specializations.hpp" #include "TraceableException.hpp" -#include "VariableDictionaryReader.hpp" -#include "VariableDictionaryWriter.hpp" +#include "type_utils.hpp" namespace clp { /** @@ -47,6 +51,39 @@ class EncodedVariableInterpreter { // Methods static encoded_variable_t encode_var_dict_id(variable_dictionary_id_t id); static variable_dictionary_id_t decode_var_dict_id(encoded_variable_t encoded_var); + + /** + * Adds a dictionary variable placeholder to the given logtype + * @param logtype + */ + static void add_dict_var(std::string& logtype) { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); + } + + /** + * Adds an integer variable placeholder to the given logtype + * @param logtype + */ + static void add_int_var(std::string& logtype) { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Integer); + } + + /** + * Adds a float variable placeholder to the given logtype + * @param logtype + */ + static void add_float_var(std::string& logtype) { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); + } + + /** + * Adds an escape character to the given logtype + * @param logtype + */ + static void add_escape(std::string& logtype) { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + } + /** * Converts the given string into a representable integer variable if possible * @param value @@ -54,7 +91,7 @@ class EncodedVariableInterpreter { * @return true if was successfully converted, false otherwise */ static bool convert_string_to_representable_integer_var( - std::string const& value, + std::string_view value, encoded_variable_t& encoded_var ); /** @@ -64,7 +101,7 @@ class EncodedVariableInterpreter { * @return true if was successfully converted, false otherwise */ static bool convert_string_to_representable_float_var( - std::string const& value, + std::string_view value, encoded_variable_t& encoded_var ); /** @@ -77,16 +114,21 @@ class EncodedVariableInterpreter { /** * Parses all variables from a message (while constructing the logtype) and encodes them (adding * them to the variable dictionary if necessary) + * @tparam VariableDictionaryWriterType + * @tparam LogTypeDictionaryEntryType * @param message * @param logtype_dict_entry * @param var_dict * @param encoded_vars * @param var_ids */ + template < + VariableDictionaryWriterReq VariableDictionaryWriterType, + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType> static void encode_and_add_to_dictionary( - std::string const& message, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, + std::string_view message, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, std::vector& encoded_vars, std::vector& var_ids ); @@ -95,7 +137,9 @@ class EncodedVariableInterpreter { * Encodes the given IR log event, constructing a logtype dictionary entry, and adding any * dictionary variables to the dictionary. NOTE: Four-byte encoded variables will be converted * to eight-byte encoded variables. - * @tparam encoded_variable_t The type of the encoded variables in the log event + * @tparam EncodedVariableType The type of the encoded variables in the log event. + * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryWriterType * @param log_event * @param logtype_dict_entry * @param var_dict @@ -104,11 +148,14 @@ class EncodedVariableInterpreter { * @param raw_num_bytes Returns an estimate of the number of bytes that this log event would * occupy if it was not encoded in CLP's IR */ - template + template < + typename EncodedVariableType, + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, + VariableDictionaryWriterReq VariableDictionaryWriterType> static void encode_and_add_to_dictionary( - ir::LogEvent const& log_event, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, + ir::LogEvent const& log_event, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, std::vector& encoded_vars, std::vector& var_ids, size_t& raw_num_bytes @@ -116,22 +163,30 @@ class EncodedVariableInterpreter { /** * Decodes all variables and decompresses them into a message + * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryReaderType + * @tparam EncodedVariableVectorType A vector of `clp::encoded_variable_t`. * @param logtype_dict_entry * @param var_dict * @param encoded_vars * @param decompressed_msg * @return true if successful, false otherwise */ + template < + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, + VariableDictionaryReaderReq VariableDictionaryReaderType, + typename EncodedVariableVectorType> // TODO: Make a concept. static bool decode_variables_into_message( - LogTypeDictionaryEntry const& logtype_dict_entry, - VariableDictionaryReader const& var_dict, - std::vector const& encoded_vars, + LogTypeDictionaryEntryType const& logtype_dict_entry, + VariableDictionaryReaderType const& var_dict, + EncodedVariableVectorType const& encoded_vars, std::string& decompressed_msg ); /** * Encodes a string-form variable, and if it is dictionary variable, searches for its ID in the - * given variable dictionary + * given variable dictionary. + * @tparam VariableDictionaryReaderType * @param var_str * @param var_dict * @param ignore_case @@ -141,25 +196,32 @@ class EncodedVariableInterpreter { * dictionary * @return false otherwise */ + template static bool encode_and_search_dictionary( - std::string const& var_str, - VariableDictionaryReader const& var_dict, + std::string_view var_str, + VariableDictionaryReaderType const& var_dict, bool ignore_case, std::string& logtype, SubQuery& sub_query ); /** * Search for the given string-form variable in the variable dictionary, encode any matches, and - * add them to the given sub-query + * add them to the given sub-query. + * @tparam VariableDictionaryReaderType + * @tparam VariableDictionaryEntryType * @param var_wildcard_str * @param var_dict * @param ignore_case * @param sub_query * @return true if any match found, false otherwise */ + template < + VariableDictionaryReaderReq VariableDictionaryReaderType, + VariableDictionaryEntryReq VariableDictionaryEntryType + = VariableDictionaryReaderType::entry_t> static bool wildcard_search_dictionary_and_get_encoded_matches( - std::string const& var_wildcard_str, - VariableDictionaryReader const& var_dict, + std::string_view var_wildcard_str, + VariableDictionaryReaderType const& var_dict, bool ignore_case, SubQuery& sub_query ); @@ -167,7 +229,9 @@ class EncodedVariableInterpreter { private: /** * Encodes the given string as a dictionary or non-dictionary variable and adds a corresponding - * placeholder to the logtype + * placeholder to the logtype. + * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryWriterType * @param var * @param logtype_dict_entry * @param var_dict @@ -175,29 +239,327 @@ class EncodedVariableInterpreter { * variable) * @return The encoded variable */ + template < + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, + VariableDictionaryWriterReq VariableDictionaryWriterType> static encoded_variable_t encode_var( - std::string const& var, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, + std::string_view var, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, std::vector& var_ids ); /** * Adds the given string to the variable dictionary and adds a corresponding placeholder to - * logtype + * logtype. + * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryWriterType * @param var * @param logtype_dict_entry * @param var_dict * @param var_ids A container to add the dictionary ID to * @return The dictionary ID */ + template < + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, + VariableDictionaryWriterReq VariableDictionaryWriterType> static variable_dictionary_id_t add_dict_var( - std::string const& var, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, + std::string_view var, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, std::vector& var_ids ); }; + +template < + VariableDictionaryWriterReq VariableDictionaryWriterType, + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType> +void EncodedVariableInterpreter::encode_and_add_to_dictionary( + std::string_view message, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, + std::vector& encoded_vars, + std::vector& var_ids +) { + // Extract all variables and add to dictionary while building logtype + size_t var_begin_pos = 0; + size_t var_end_pos = 0; + std::string_view var_str; + logtype_dict_entry.clear(); + // To avoid reallocating the logtype as we append to it, reserve enough space to hold the entire + // message + logtype_dict_entry.reserve_constant_length(message.length()); + while (logtype_dict_entry.parse_next_var(message, var_begin_pos, var_end_pos, var_str)) { + auto encoded_var = encode_var(var_str, logtype_dict_entry, var_dict, var_ids); + encoded_vars.push_back(encoded_var); + } +} + +template < + typename EncodedVariableType, + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, + VariableDictionaryWriterReq VariableDictionaryWriterType> +void EncodedVariableInterpreter::encode_and_add_to_dictionary( + ir::LogEvent const& log_event, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, + std::vector& encoded_vars, + std::vector& var_ids, + size_t& raw_num_bytes +) { + logtype_dict_entry.clear(); + auto const& log_message = log_event.get_message(); + logtype_dict_entry.reserve_constant_length(log_message.get_logtype().length()); + + raw_num_bytes = 0; + + auto constant_handler = [&](std::string const& value, size_t begin_pos, size_t length) { + raw_num_bytes += length; + logtype_dict_entry.add_constant(value, begin_pos, length); + }; + + auto encoded_int_handler = [&](EncodedVariableType encoded_var) { + raw_num_bytes += ffi::decode_integer_var(encoded_var).length(); + logtype_dict_entry.add_int_var(); + + ir::eight_byte_encoded_variable_t eight_byte_encoded_var{}; + if constexpr (std::is_same_v) { + eight_byte_encoded_var = encoded_var; + } else { // std::is_same_v + eight_byte_encoded_var = ffi::encode_four_byte_integer_as_eight_byte(encoded_var); + } + encoded_vars.push_back(eight_byte_encoded_var); + }; + + auto encoded_float_handler = [&](EncodedVariableType encoded_var) { + raw_num_bytes += ffi::decode_float_var(encoded_var).length(); + logtype_dict_entry.add_float_var(); + + ir::eight_byte_encoded_variable_t eight_byte_encoded_var{}; + if constexpr (std::is_same_v) { + eight_byte_encoded_var = encoded_var; + } else { // std::is_same_v + eight_byte_encoded_var = ffi::encode_four_byte_float_as_eight_byte(encoded_var); + } + encoded_vars.push_back(eight_byte_encoded_var); + }; + + auto dict_var_handler = [&](std::string const& dict_var) { + raw_num_bytes += dict_var.length(); + + ir::eight_byte_encoded_variable_t encoded_var{}; + if constexpr (std::is_same_v) { + encoded_var = encode_var_dict_id( + add_dict_var(dict_var, logtype_dict_entry, var_dict, var_ids) + ); + } else { // std::is_same_v + encoded_var = encode_var(dict_var, logtype_dict_entry, var_dict, var_ids); + } + encoded_vars.push_back(encoded_var); + }; + + ffi::ir_stream::generic_decode_message( + log_message.get_logtype(), + log_message.get_encoded_vars(), + log_message.get_dict_vars(), + constant_handler, + encoded_int_handler, + encoded_float_handler, + dict_var_handler + ); +} + +template < + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, + VariableDictionaryReaderReq VariableDictionaryReaderType, + typename EncodedVariableVectorType> +bool EncodedVariableInterpreter::decode_variables_into_message( + LogTypeDictionaryEntryType const& logtype_dict_entry, + VariableDictionaryReaderType const& var_dict, + EncodedVariableVectorType const& encoded_vars, + std::string& decompressed_msg +) { + // Ensure the number of variables in the logtype matches the number of encoded variables given + auto const& logtype_value = logtype_dict_entry.get_value(); + size_t const num_vars = logtype_dict_entry.get_num_variables(); + if (num_vars != encoded_vars.size()) { + SPDLOG_ERROR( + "EncodedVariableInterpreter: Logtype '{}' contains {} variables, but {} were given " + "for decoding.", + logtype_value.c_str(), + num_vars, + encoded_vars.size() + ); + return false; + } + + ir::VariablePlaceholder var_placeholder; + size_t constant_begin_pos = 0; + std::string float_str; + variable_dictionary_id_t var_dict_id; + size_t const num_placeholders_in_logtype = logtype_dict_entry.get_num_placeholders(); + for (size_t placeholder_ix = 0, var_ix = 0; placeholder_ix < num_placeholders_in_logtype; + ++placeholder_ix) + { + size_t placeholder_position + = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); + + // Add the constant that's between the last placeholder and this one + decompressed_msg.append( + logtype_value, + constant_begin_pos, + placeholder_position - constant_begin_pos + ); + switch (var_placeholder) { + case ir::VariablePlaceholder::Integer: + decompressed_msg += std::to_string(encoded_vars[var_ix++]); + break; + case ir::VariablePlaceholder::Float: + convert_encoded_float_to_string(encoded_vars[var_ix++], float_str); + decompressed_msg += float_str; + break; + case ir::VariablePlaceholder::Dictionary: + var_dict_id = decode_var_dict_id(encoded_vars[var_ix++]); + decompressed_msg += var_dict.get_value(var_dict_id); + break; + case ir::VariablePlaceholder::Escape: + break; + default: + SPDLOG_ERROR( + "EncodedVariableInterpreter: Logtype '{}' contains unexpected variable " + "placeholder 0x{:x}", + logtype_value, + enum_to_underlying_type(var_placeholder) + ); + return false; + } + // Move past the variable placeholder + constant_begin_pos = placeholder_position + 1; + } + // Append remainder of logtype, if any + if (constant_begin_pos < logtype_value.length()) { + decompressed_msg.append(logtype_value, constant_begin_pos, std::string::npos); + } + + return true; +} + +template +bool EncodedVariableInterpreter::encode_and_search_dictionary( + std::string_view var_str, + VariableDictionaryReaderType const& var_dict, + bool ignore_case, + std::string& logtype, + SubQuery& sub_query +) { + size_t length = var_str.length(); + if (0 == length) { + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } + + encoded_variable_t encoded_var; + if (convert_string_to_representable_integer_var(var_str, encoded_var)) { + add_int_var(logtype); + sub_query.add_non_dict_var(encoded_var); + } else if (convert_string_to_representable_float_var(var_str, encoded_var)) { + add_float_var(logtype); + sub_query.add_non_dict_var(encoded_var); + } else { + auto const entries = var_dict.get_entry_matching_value(var_str, ignore_case); + if (entries.empty()) { + // Not in dictionary + return false; + } + + add_dict_var(logtype); + + if (entries.size() == 1) { + auto const* entry = entries.at(0); + sub_query.add_dict_var(encode_var_dict_id(entry->get_id()), entry->get_id()); + return true; + } + + std::unordered_set encoded_vars; + std::unordered_set var_dict_ids; + encoded_vars.reserve(entries.size()); + for (auto const* entry : entries) { + encoded_vars.emplace(encode_var_dict_id(entry->get_id())); + var_dict_ids.emplace(entry->get_id()); + } + sub_query.add_imprecise_dict_var(encoded_vars, var_dict_ids); + } + + return true; +} + +template < + VariableDictionaryReaderReq VariableDictionaryReaderType, + VariableDictionaryEntryReq VariableDictionaryEntryType> +bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matches( + std::string_view var_wildcard_str, + VariableDictionaryReaderType const& var_dict, + bool ignore_case, + SubQuery& sub_query +) { + // Find matches + std::unordered_set var_dict_entries; + var_dict.get_entries_matching_wildcard_string(var_wildcard_str, ignore_case, var_dict_entries); + if (var_dict_entries.empty()) { + // Not in dictionary + return false; + } + + // Encode matches + std::unordered_set encoded_vars; + std::unordered_set var_dict_ids; + for (auto entry : var_dict_entries) { + encoded_vars.emplace(encode_var_dict_id(entry->get_id())); + var_dict_ids.emplace(entry->get_id()); + } + + sub_query.add_imprecise_dict_var(encoded_vars, var_dict_ids); + + return true; +} + +template < + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, + VariableDictionaryWriterReq VariableDictionaryWriterType> +encoded_variable_t EncodedVariableInterpreter::encode_var( + std::string_view var, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, + std::vector& var_ids +) { + encoded_variable_t encoded_var{0}; + if (convert_string_to_representable_integer_var(var, encoded_var)) { + logtype_dict_entry.add_int_var(); + } else if (convert_string_to_representable_float_var(var, encoded_var)) { + logtype_dict_entry.add_float_var(); + } else { + // Variable string looks like a dictionary variable, so encode it as so + encoded_var = encode_var_dict_id(add_dict_var(var, logtype_dict_entry, var_dict, var_ids)); + } + return encoded_var; +} + +template < + LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, + VariableDictionaryWriterReq VariableDictionaryWriterType> +variable_dictionary_id_t EncodedVariableInterpreter::add_dict_var( + std::string_view var, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, + std::vector& var_ids +) { + variable_dictionary_id_t id{cVariableDictionaryIdMax}; + var_dict.add_entry(var, id); + var_ids.push_back(id); + + logtype_dict_entry.add_dictionary_var(); + + return id; +} } // namespace clp #endif // CLP_ENCODEDVARIABLEINTERPRETER_HPP diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index 4dcc6d399e..9b0f952358 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -4,7 +4,10 @@ #include "../src/clp/EncodedVariableInterpreter.hpp" #include "../src/clp/ir/types.hpp" +#include "../src/clp/LogTypeDictionaryEntry.hpp" #include "../src/clp/streaming_archive/Constants.hpp" +#include "../src/clp/VariableDictionaryReader.hpp" +#include "../src/clp/VariableDictionaryWriter.hpp" using clp::cVariableDictionaryIdMax; using clp::encoded_variable_t; From 316456931ae565b93b752d000c77c0ec2fabe490 Mon Sep 17 00:00:00 2001 From: gibber9809 Date: Tue, 29 Jul 2025 14:52:51 +0000 Subject: [PATCH 2/9] Update dictionaries to use std::string_view where possible and follow future concept interface --- components/core/src/clp/DictionaryReader.hpp | 20 ++++++--- components/core/src/clp/DictionaryWriter.hpp | 8 +++- .../core/src/clp/LogTypeDictionaryEntry.cpp | 27 +++++------- .../core/src/clp/LogTypeDictionaryEntry.hpp | 42 +++---------------- .../core/src/clp/VariableDictionaryWriter.cpp | 8 +++- .../core/src/clp/VariableDictionaryWriter.hpp | 4 +- 6 files changed, 46 insertions(+), 63 deletions(-) diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 9d49e21228..7e8f3a3a16 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -1,7 +1,10 @@ #ifndef CLP_DICTIONARYREADER_HPP #define CLP_DICTIONARYREADER_HPP +#include #include +#include +#include #include #include @@ -34,6 +37,9 @@ class DictionaryReader { char const* what() const noexcept override { return "DictionaryReader operation failed"; } }; + using dictionary_id_t = DictionaryIdType; + using entry_t = EntryType; + // Constructors DictionaryReader() : m_is_open(false), m_num_segments_read_from_index(0) { static_assert( @@ -85,7 +91,7 @@ class DictionaryReader { * @return a vector of matching entries, or an empty vector if no entry matches. */ std::vector - get_entry_matching_value(std::string const& search_string, bool ignore_case) const; + get_entry_matching_value(std::string_view search_string, bool ignore_case) const; /** * Gets the entries that match a given wildcard string * @param wildcard_string @@ -93,7 +99,7 @@ class DictionaryReader { * @param entries Set in which to store found entries */ void get_entries_matching_wildcard_string( - std::string const& wildcard_string, + std::string_view wildcard_string, bool ignore_case, std::unordered_set& entries ) const; @@ -235,7 +241,7 @@ DictionaryReader::get_value(DictionaryIdType id) co template std::vector DictionaryReader::get_entry_matching_value( - std::string const& search_string, + std::string_view search_string, bool ignore_case ) const { if (false == ignore_case) { @@ -252,7 +258,11 @@ DictionaryReader::get_entry_matching_value( } std::vector entries; - auto const search_string_uppercase = boost::algorithm::to_upper_copy(search_string); + std::string search_string_uppercase; + std::ignore = boost::algorithm::to_upper_copy( + std::back_inserter(search_string_uppercase), + search_string + ); for (auto const& entry : m_entries) { if (boost::algorithm::to_upper_copy(entry.get_value()) == search_string_uppercase) { entries.push_back(&entry); @@ -263,7 +273,7 @@ DictionaryReader::get_entry_matching_value( template void DictionaryReader::get_entries_matching_wildcard_string( - std::string const& wildcard_string, + std::string_view wildcard_string, bool ignore_case, std::unordered_set& entries ) const { diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index 7cac9d5aa5..280a2ae3dd 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -2,7 +2,8 @@ #define CLP_DICTIONARYWRITER_HPP #include -#include + +#include #include "ArrayBackedPosIntSet.hpp" #include "Defs.h" @@ -34,6 +35,9 @@ class DictionaryWriter { char const* what() const noexcept override { return "DictionaryWriter operation failed"; } }; + using dictionary_id_t = DictionaryIdType; + using entry_t = EntryType; + // Constructors DictionaryWriter() : m_is_open(false) {} @@ -83,7 +87,7 @@ class DictionaryWriter { protected: // Types - using value_to_id_t = std::unordered_map; + using value_to_id_t = absl::flat_hash_map; // Variables bool m_is_open; diff --git a/components/core/src/clp/LogTypeDictionaryEntry.cpp b/components/core/src/clp/LogTypeDictionaryEntry.cpp index 62a9db7bf5..1b8784cd77 100644 --- a/components/core/src/clp/LogTypeDictionaryEntry.cpp +++ b/components/core/src/clp/LogTypeDictionaryEntry.cpp @@ -1,5 +1,6 @@ #include "LogTypeDictionaryEntry.hpp" +#include "EncodedVariableInterpreter.hpp" #include "ir/parsing.hpp" #include "ir/types.hpp" #include "type_utils.hpp" @@ -33,7 +34,7 @@ size_t LogTypeDictionaryEntry::get_data_size() const { } void LogTypeDictionaryEntry::add_constant( - string const& value_containing_constant, + std::string_view value_containing_constant, size_t begin_pos, size_t length ) { @@ -42,30 +43,30 @@ void LogTypeDictionaryEntry::add_constant( void LogTypeDictionaryEntry::add_dictionary_var() { m_placeholder_positions.push_back(m_value.length()); - add_dict_var(m_value); + EncodedVariableInterpreter::add_dict_var(m_value); } void LogTypeDictionaryEntry::add_int_var() { m_placeholder_positions.push_back(m_value.length()); - add_int_var(m_value); + EncodedVariableInterpreter::add_int_var(m_value); } void LogTypeDictionaryEntry::add_float_var() { m_placeholder_positions.push_back(m_value.length()); - add_float_var(m_value); + EncodedVariableInterpreter::add_float_var(m_value); } void LogTypeDictionaryEntry::add_escape() { m_placeholder_positions.push_back(m_value.length()); - add_escape(m_value); + EncodedVariableInterpreter::add_escape(m_value); ++m_num_escaped_placeholders; } bool LogTypeDictionaryEntry::parse_next_var( - string const& msg, + std::string_view msg, size_t& var_begin_pos, size_t& var_end_pos, - string& var + std::string_view& var ) { auto last_var_end_pos = var_end_pos; // clang-format off @@ -81,21 +82,15 @@ bool LogTypeDictionaryEntry::parse_next_var( // clang-format on if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { // Append to log type: from end of last variable to start of current variable - auto constant = static_cast(msg).substr( - last_var_end_pos, - var_begin_pos - last_var_end_pos - ); + auto constant = msg.substr(last_var_end_pos, var_begin_pos - last_var_end_pos); ir::append_constant_to_logtype(constant, escape_handler, m_value); - var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); + var = msg.substr(var_begin_pos, var_end_pos - var_begin_pos); return true; } if (last_var_end_pos < msg.length()) { // Append to log type: from end of last variable to end - auto constant = static_cast(msg).substr( - last_var_end_pos, - msg.length() - last_var_end_pos - ); + auto constant = msg.substr(last_var_end_pos, msg.length() - last_var_end_pos); ir::append_constant_to_logtype(constant, escape_handler, m_value); } diff --git a/components/core/src/clp/LogTypeDictionaryEntry.hpp b/components/core/src/clp/LogTypeDictionaryEntry.hpp index 7cd77650f5..426797c5ad 100644 --- a/components/core/src/clp/LogTypeDictionaryEntry.hpp +++ b/components/core/src/clp/LogTypeDictionaryEntry.hpp @@ -1,6 +1,8 @@ #ifndef CLP_LOGTYPEDICTIONARYENTRY_HPP #define CLP_LOGTYPEDICTIONARYENTRY_HPP +#include +#include #include #include "Defs.h" @@ -11,7 +13,6 @@ #include "streaming_compression/zstd/Compressor.hpp" #include "streaming_compression/zstd/Decompressor.hpp" #include "TraceableException.hpp" -#include "type_utils.hpp" namespace clp { /** @@ -42,38 +43,6 @@ class LogTypeDictionaryEntry : public DictionaryEntry { LogTypeDictionaryEntry& operator=(LogTypeDictionaryEntry const&) = default; // Methods - /** - * Adds a dictionary variable placeholder to the given logtype - * @param logtype - */ - static void add_dict_var(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); - } - - /** - * Adds an integer variable placeholder to the given logtype - * @param logtype - */ - static void add_int_var(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Integer); - } - - /** - * Adds a float variable placeholder to the given logtype - * @param logtype - */ - static void add_float_var(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); - } - - /** - * Adds an escape character to the given logtype - * @param logtype - */ - static void add_escape(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); - } - /** * @return The number of variable placeholders (including escaped ones) in the logtype. */ @@ -106,8 +75,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry { * @param begin_pos Start of the constant in value_containing_constant * @param length */ - void - add_constant(std::string const& value_containing_constant, size_t begin_pos, size_t length); + void add_constant(std::string_view value_containing_constant, size_t begin_pos, size_t length); /** * Adds an int variable placeholder */ @@ -137,10 +105,10 @@ class LogTypeDictionaryEntry : public DictionaryEntry { * @return true if another variable was found, false otherwise */ bool parse_next_var( - std::string const& msg, + std::string_view msg, size_t& var_begin_pos, size_t& var_end_pos, - std::string& var + std::string_view& var ); /** diff --git a/components/core/src/clp/VariableDictionaryWriter.cpp b/components/core/src/clp/VariableDictionaryWriter.cpp index 77b0635035..abd056de98 100644 --- a/components/core/src/clp/VariableDictionaryWriter.cpp +++ b/components/core/src/clp/VariableDictionaryWriter.cpp @@ -1,10 +1,14 @@ #include "VariableDictionaryWriter.hpp" +#include +#include + +#include "Defs.h" #include "dictionary_utils.hpp" #include "spdlog_with_specializations.hpp" namespace clp { -bool VariableDictionaryWriter::add_entry(std::string const& value, variable_dictionary_id_t& id) { +bool VariableDictionaryWriter::add_entry(std::string_view value, variable_dictionary_id_t& id) { bool new_entry = false; auto const ix = m_value_to_id.find(value); @@ -23,7 +27,7 @@ bool VariableDictionaryWriter::add_entry(std::string const& value, variable_dict ++m_next_id; // Insert the ID obtained from the database into the dictionary - auto entry = VariableDictionaryEntry(value, id); + auto entry = VariableDictionaryEntry(std::string{value}, id); m_value_to_id[value] = id; new_entry = true; diff --git a/components/core/src/clp/VariableDictionaryWriter.hpp b/components/core/src/clp/VariableDictionaryWriter.hpp index 3e6384d2a8..47299499a3 100644 --- a/components/core/src/clp/VariableDictionaryWriter.hpp +++ b/components/core/src/clp/VariableDictionaryWriter.hpp @@ -1,6 +1,8 @@ #ifndef CLP_VARIABLEDICTIONARYWRITER_HPP #define CLP_VARIABLEDICTIONARYWRITER_HPP +#include + #include "Defs.h" #include "DictionaryWriter.hpp" #include "VariableDictionaryEntry.hpp" @@ -30,7 +32,7 @@ class VariableDictionaryWriter * @param value * @param id ID of the variable matching the given entry */ - bool add_entry(std::string const& value, variable_dictionary_id_t& id); + bool add_entry(std::string_view value, variable_dictionary_id_t& id); }; } // namespace clp From 066e5b435977a87649f4649183f9bd61d4684837 Mon Sep 17 00:00:00 2001 From: gibber9809 Date: Tue, 29 Jul 2025 15:07:08 +0000 Subject: [PATCH 3/9] Remove concept usage to help break up changes. --- .../src/clp/EncodedVariableInterpreter.hpp | 55 +++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/components/core/src/clp/EncodedVariableInterpreter.hpp b/components/core/src/clp/EncodedVariableInterpreter.hpp index ce611481a3..d5be9ad8f0 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.hpp +++ b/components/core/src/clp/EncodedVariableInterpreter.hpp @@ -6,7 +6,6 @@ #include #include -#include "DictionaryConcepts.hpp" #include "ffi/ir_stream/decoding_methods.hpp" #include "ir/LogEvent.hpp" #include "ir/types.hpp" @@ -123,8 +122,8 @@ class EncodedVariableInterpreter { * @param var_ids */ template < - VariableDictionaryWriterReq VariableDictionaryWriterType, - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType> + typename VariableDictionaryWriterType, + typename LogTypeDictionaryEntryType> static void encode_and_add_to_dictionary( std::string_view message, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -150,8 +149,8 @@ class EncodedVariableInterpreter { */ template < typename EncodedVariableType, - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, - VariableDictionaryWriterReq VariableDictionaryWriterType> + typename LogTypeDictionaryEntryType, + typename VariableDictionaryWriterType> static void encode_and_add_to_dictionary( ir::LogEvent const& log_event, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -173,9 +172,9 @@ class EncodedVariableInterpreter { * @return true if successful, false otherwise */ template < - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, - VariableDictionaryReaderReq VariableDictionaryReaderType, - typename EncodedVariableVectorType> // TODO: Make a concept. + typename LogTypeDictionaryEntryType, + typename VariableDictionaryReaderType, + typename EncodedVariableVectorType> static bool decode_variables_into_message( LogTypeDictionaryEntryType const& logtype_dict_entry, VariableDictionaryReaderType const& var_dict, @@ -196,7 +195,7 @@ class EncodedVariableInterpreter { * dictionary * @return false otherwise */ - template + template static bool encode_and_search_dictionary( std::string_view var_str, VariableDictionaryReaderType const& var_dict, @@ -216,8 +215,8 @@ class EncodedVariableInterpreter { * @return true if any match found, false otherwise */ template < - VariableDictionaryReaderReq VariableDictionaryReaderType, - VariableDictionaryEntryReq VariableDictionaryEntryType + typename VariableDictionaryReaderType, + typename VariableDictionaryEntryType = VariableDictionaryReaderType::entry_t> static bool wildcard_search_dictionary_and_get_encoded_matches( std::string_view var_wildcard_str, @@ -240,8 +239,8 @@ class EncodedVariableInterpreter { * @return The encoded variable */ template < - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, - VariableDictionaryWriterReq VariableDictionaryWriterType> + typename LogTypeDictionaryEntryType, + typename VariableDictionaryWriterType> static encoded_variable_t encode_var( std::string_view var, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -261,8 +260,8 @@ class EncodedVariableInterpreter { * @return The dictionary ID */ template < - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, - VariableDictionaryWriterReq VariableDictionaryWriterType> + typename LogTypeDictionaryEntryType, + typename VariableDictionaryWriterType> static variable_dictionary_id_t add_dict_var( std::string_view var, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -272,8 +271,8 @@ class EncodedVariableInterpreter { }; template < - VariableDictionaryWriterReq VariableDictionaryWriterType, - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType> + typename VariableDictionaryWriterType, + typename LogTypeDictionaryEntryType> void EncodedVariableInterpreter::encode_and_add_to_dictionary( std::string_view message, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -297,8 +296,8 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( template < typename EncodedVariableType, - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, - VariableDictionaryWriterReq VariableDictionaryWriterType> + typename LogTypeDictionaryEntryType, + typename VariableDictionaryWriterType> void EncodedVariableInterpreter::encode_and_add_to_dictionary( ir::LogEvent const& log_event, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -370,8 +369,8 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( } template < - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, - VariableDictionaryReaderReq VariableDictionaryReaderType, + typename LogTypeDictionaryEntryType, + typename VariableDictionaryReaderType, typename EncodedVariableVectorType> bool EncodedVariableInterpreter::decode_variables_into_message( LogTypeDictionaryEntryType const& logtype_dict_entry, @@ -444,7 +443,7 @@ bool EncodedVariableInterpreter::decode_variables_into_message( return true; } -template +template bool EncodedVariableInterpreter::encode_and_search_dictionary( std::string_view var_str, VariableDictionaryReaderType const& var_dict, @@ -493,8 +492,8 @@ bool EncodedVariableInterpreter::encode_and_search_dictionary( } template < - VariableDictionaryReaderReq VariableDictionaryReaderType, - VariableDictionaryEntryReq VariableDictionaryEntryType> + typename VariableDictionaryReaderType, + typename VariableDictionaryEntryType> bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matches( std::string_view var_wildcard_str, VariableDictionaryReaderType const& var_dict, @@ -523,8 +522,8 @@ bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matc } template < - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, - VariableDictionaryWriterReq VariableDictionaryWriterType> + typename LogTypeDictionaryEntryType, + typename VariableDictionaryWriterType> encoded_variable_t EncodedVariableInterpreter::encode_var( std::string_view var, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -544,8 +543,8 @@ encoded_variable_t EncodedVariableInterpreter::encode_var( } template < - LogTypeDictionaryEntryReq LogTypeDictionaryEntryType, - VariableDictionaryWriterReq VariableDictionaryWriterType> + typename LogTypeDictionaryEntryType, + typename VariableDictionaryWriterType> variable_dictionary_id_t EncodedVariableInterpreter::add_dict_var( std::string_view var, LogTypeDictionaryEntryType& logtype_dict_entry, From 70ef5e39197f298c8fc31ce1d3d53205b9878c69 Mon Sep 17 00:00:00 2001 From: gibber9809 Date: Tue, 29 Jul 2025 15:23:03 +0000 Subject: [PATCH 4/9] Remove dependence on changes do Query.{cpp,hpp} in order to split up changes --- .../src/clp/EncodedVariableInterpreter.hpp | 52 +++++++------------ 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/components/core/src/clp/EncodedVariableInterpreter.hpp b/components/core/src/clp/EncodedVariableInterpreter.hpp index d5be9ad8f0..478f6559c5 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.hpp +++ b/components/core/src/clp/EncodedVariableInterpreter.hpp @@ -121,9 +121,7 @@ class EncodedVariableInterpreter { * @param encoded_vars * @param var_ids */ - template < - typename VariableDictionaryWriterType, - typename LogTypeDictionaryEntryType> + template static void encode_and_add_to_dictionary( std::string_view message, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -186,6 +184,7 @@ class EncodedVariableInterpreter { * Encodes a string-form variable, and if it is dictionary variable, searches for its ID in the * given variable dictionary. * @tparam VariableDictionaryReaderType + * @tparam VariableDictionaryEntryType * @param var_str * @param var_dict * @param ignore_case @@ -195,7 +194,9 @@ class EncodedVariableInterpreter { * dictionary * @return false otherwise */ - template + template < + typename VariableDictionaryReaderType, + typename VariableDictionaryEntryType = VariableDictionaryReaderType::entry_t> static bool encode_and_search_dictionary( std::string_view var_str, VariableDictionaryReaderType const& var_dict, @@ -216,8 +217,7 @@ class EncodedVariableInterpreter { */ template < typename VariableDictionaryReaderType, - typename VariableDictionaryEntryType - = VariableDictionaryReaderType::entry_t> + typename VariableDictionaryEntryType = VariableDictionaryReaderType::entry_t> static bool wildcard_search_dictionary_and_get_encoded_matches( std::string_view var_wildcard_str, VariableDictionaryReaderType const& var_dict, @@ -238,9 +238,7 @@ class EncodedVariableInterpreter { * variable) * @return The encoded variable */ - template < - typename LogTypeDictionaryEntryType, - typename VariableDictionaryWriterType> + template static encoded_variable_t encode_var( std::string_view var, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -259,9 +257,7 @@ class EncodedVariableInterpreter { * @param var_ids A container to add the dictionary ID to * @return The dictionary ID */ - template < - typename LogTypeDictionaryEntryType, - typename VariableDictionaryWriterType> + template static variable_dictionary_id_t add_dict_var( std::string_view var, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -270,9 +266,7 @@ class EncodedVariableInterpreter { ); }; -template < - typename VariableDictionaryWriterType, - typename LogTypeDictionaryEntryType> +template void EncodedVariableInterpreter::encode_and_add_to_dictionary( std::string_view message, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -443,7 +437,7 @@ bool EncodedVariableInterpreter::decode_variables_into_message( return true; } -template +template bool EncodedVariableInterpreter::encode_and_search_dictionary( std::string_view var_str, VariableDictionaryReaderType const& var_dict, @@ -474,26 +468,26 @@ bool EncodedVariableInterpreter::encode_and_search_dictionary( if (entries.size() == 1) { auto const* entry = entries.at(0); - sub_query.add_dict_var(encode_var_dict_id(entry->get_id()), entry->get_id()); + sub_query.add_dict_var(encode_var_dict_id(entry->get_id()), entry); return true; } + std::unordered_set const entries_set{ + entries.cbegin(), + entries.cend() + }; std::unordered_set encoded_vars; - std::unordered_set var_dict_ids; encoded_vars.reserve(entries.size()); for (auto const* entry : entries) { encoded_vars.emplace(encode_var_dict_id(entry->get_id())); - var_dict_ids.emplace(entry->get_id()); } - sub_query.add_imprecise_dict_var(encoded_vars, var_dict_ids); + sub_query.add_imprecise_dict_var(encoded_vars, entries_set); } return true; } -template < - typename VariableDictionaryReaderType, - typename VariableDictionaryEntryType> +template bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matches( std::string_view var_wildcard_str, VariableDictionaryReaderType const& var_dict, @@ -510,20 +504,16 @@ bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matc // Encode matches std::unordered_set encoded_vars; - std::unordered_set var_dict_ids; for (auto entry : var_dict_entries) { encoded_vars.emplace(encode_var_dict_id(entry->get_id())); - var_dict_ids.emplace(entry->get_id()); } - sub_query.add_imprecise_dict_var(encoded_vars, var_dict_ids); + sub_query.add_imprecise_dict_var(encoded_vars, var_dict_entries); return true; } -template < - typename LogTypeDictionaryEntryType, - typename VariableDictionaryWriterType> +template encoded_variable_t EncodedVariableInterpreter::encode_var( std::string_view var, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -542,9 +532,7 @@ encoded_variable_t EncodedVariableInterpreter::encode_var( return encoded_var; } -template < - typename LogTypeDictionaryEntryType, - typename VariableDictionaryWriterType> +template variable_dictionary_id_t EncodedVariableInterpreter::add_dict_var( std::string_view var, LogTypeDictionaryEntryType& logtype_dict_entry, From 8d27a9e6c23ba914294efa4bc31f47c2f8d30ec1 Mon Sep 17 00:00:00 2001 From: gibber9809 Date: Tue, 29 Jul 2025 15:23:48 +0000 Subject: [PATCH 5/9] Update Grep to use methods that were moved from LogTypeDictionaryEntry to EncodedVariableInterpreter --- components/core/src/clp/Grep.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/components/core/src/clp/Grep.cpp b/components/core/src/clp/Grep.cpp index e0c2caf51c..a8decb7fb3 100644 --- a/components/core/src/clp/Grep.cpp +++ b/components/core/src/clp/Grep.cpp @@ -343,11 +343,11 @@ bool process_var_token( } if (query_token.is_float_var()) { - LogTypeDictionaryEntry::add_float_var(logtype); + EncodedVariableInterpreter::add_float_var(logtype); } else if (query_token.is_int_var()) { - LogTypeDictionaryEntry::add_int_var(logtype); + EncodedVariableInterpreter::add_int_var(logtype); } else { - LogTypeDictionaryEntry::add_dict_var(logtype); + EncodedVariableInterpreter::add_dict_var(logtype); if (query_token.cannot_convert_to_non_dict_var()) { // Must be a dictionary variable, so search variable dictionary @@ -451,7 +451,7 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery( logtype += '*'; } else { logtype += '*'; - LogTypeDictionaryEntry::add_dict_var(logtype); + EncodedVariableInterpreter::add_dict_var(logtype); logtype += '*'; } } else { From 5e15f40cad3295251a7c8ec7168dddfa0e542e79 Mon Sep 17 00:00:00 2001 From: gibber9809 Date: Tue, 29 Jul 2025 15:24:24 +0000 Subject: [PATCH 6/9] Update cmake scripts to record new absl::flat_hash_map dependency --- components/core/src/clp/clg/CMakeLists.txt | 1 + components/core/src/clp/clo/CMakeLists.txt | 1 + components/core/src/clp/clp/CMakeLists.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/components/core/src/clp/clg/CMakeLists.txt b/components/core/src/clp/clg/CMakeLists.txt index b8e2f1962b..816173bdb0 100644 --- a/components/core/src/clp/clg/CMakeLists.txt +++ b/components/core/src/clp/clg/CMakeLists.txt @@ -132,6 +132,7 @@ if(CLP_BUILD_EXECUTABLES) ) target_link_libraries(clg PRIVATE + absl::flat_hash_map Boost::filesystem Boost::program_options date::date fmt::fmt diff --git a/components/core/src/clp/clo/CMakeLists.txt b/components/core/src/clp/clo/CMakeLists.txt index a4cf0ddf47..94d03c79e0 100644 --- a/components/core/src/clp/clo/CMakeLists.txt +++ b/components/core/src/clp/clo/CMakeLists.txt @@ -160,6 +160,7 @@ if(CLP_BUILD_EXECUTABLES) ) target_link_libraries(clo PRIVATE + absl::flat_hash_map Boost::filesystem Boost::program_options date::date fmt::fmt diff --git a/components/core/src/clp/clp/CMakeLists.txt b/components/core/src/clp/clp/CMakeLists.txt index c9782c509f..e9b35c296e 100644 --- a/components/core/src/clp/clp/CMakeLists.txt +++ b/components/core/src/clp/clp/CMakeLists.txt @@ -174,6 +174,7 @@ if(CLP_BUILD_EXECUTABLES) ) target_link_libraries(clp PRIVATE + absl::flat_hash_map Boost::filesystem Boost::program_options date::date fmt::fmt From 7bf9a76f28e1251706faf759bbc9c205e80a9481 Mon Sep 17 00:00:00 2001 From: gibber9809 Date: Tue, 29 Jul 2025 21:54:28 +0000 Subject: [PATCH 7/9] Address review comments. --- .../src/clp/EncodedVariableInterpreter.hpp | 18 +++++++++--------- .../core/src/clp/LogTypeDictionaryEntry.hpp | 1 - 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/components/core/src/clp/EncodedVariableInterpreter.hpp b/components/core/src/clp/EncodedVariableInterpreter.hpp index 478f6559c5..085497a9f3 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.hpp +++ b/components/core/src/clp/EncodedVariableInterpreter.hpp @@ -56,7 +56,7 @@ class EncodedVariableInterpreter { * @param logtype */ static void add_dict_var(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); + logtype.emplace_back(enum_to_underlying_type(ir::VariablePlaceholder::Dictionary)); } /** @@ -64,7 +64,7 @@ class EncodedVariableInterpreter { * @param logtype */ static void add_int_var(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Integer); + logtype.emplace_back(enum_to_underlying_type(ir::VariablePlaceholder::Integer)); } /** @@ -72,7 +72,7 @@ class EncodedVariableInterpreter { * @param logtype */ static void add_float_var(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); + logtype.emplace_back(enum_to_underlying_type(ir::VariablePlaceholder::Float)); } /** @@ -80,7 +80,7 @@ class EncodedVariableInterpreter { * @param logtype */ static void add_escape(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + logtype.emplace_back(enum_to_underlying_type(ir::VariablePlaceholder::Escape)); } /** @@ -162,7 +162,7 @@ class EncodedVariableInterpreter { * Decodes all variables and decompresses them into a message * @tparam LogTypeDictionaryEntryType * @tparam VariableDictionaryReaderType - * @tparam EncodedVariableVectorType A vector of `clp::encoded_variable_t`. + * @tparam EncodedVariableContainerType A random access list of `clp::encoded_variable_t`. * @param logtype_dict_entry * @param var_dict * @param encoded_vars @@ -172,11 +172,11 @@ class EncodedVariableInterpreter { template < typename LogTypeDictionaryEntryType, typename VariableDictionaryReaderType, - typename EncodedVariableVectorType> + typename EncodedVariableContainerType> static bool decode_variables_into_message( LogTypeDictionaryEntryType const& logtype_dict_entry, VariableDictionaryReaderType const& var_dict, - EncodedVariableVectorType const& encoded_vars, + EncodedVariableContainerType const& encoded_vars, std::string& decompressed_msg ); @@ -365,11 +365,11 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( template < typename LogTypeDictionaryEntryType, typename VariableDictionaryReaderType, - typename EncodedVariableVectorType> + typename EncodedVariableContainerType> bool EncodedVariableInterpreter::decode_variables_into_message( LogTypeDictionaryEntryType const& logtype_dict_entry, VariableDictionaryReaderType const& var_dict, - EncodedVariableVectorType const& encoded_vars, + EncodedVariableContainerType const& encoded_vars, std::string& decompressed_msg ) { // Ensure the number of variables in the logtype matches the number of encoded variables given diff --git a/components/core/src/clp/LogTypeDictionaryEntry.hpp b/components/core/src/clp/LogTypeDictionaryEntry.hpp index 426797c5ad..91488ef017 100644 --- a/components/core/src/clp/LogTypeDictionaryEntry.hpp +++ b/components/core/src/clp/LogTypeDictionaryEntry.hpp @@ -1,7 +1,6 @@ #ifndef CLP_LOGTYPEDICTIONARYENTRY_HPP #define CLP_LOGTYPEDICTIONARYENTRY_HPP -#include #include #include From 7b6012e30faa77bd267874d22e031fe1f2c17f6d Mon Sep 17 00:00:00 2001 From: gibber9809 Date: Tue, 29 Jul 2025 22:04:21 +0000 Subject: [PATCH 8/9] Fix compilation error --- components/core/src/clp/EncodedVariableInterpreter.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/components/core/src/clp/EncodedVariableInterpreter.hpp b/components/core/src/clp/EncodedVariableInterpreter.hpp index 085497a9f3..6359ae8668 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.hpp +++ b/components/core/src/clp/EncodedVariableInterpreter.hpp @@ -56,7 +56,7 @@ class EncodedVariableInterpreter { * @param logtype */ static void add_dict_var(std::string& logtype) { - logtype.emplace_back(enum_to_underlying_type(ir::VariablePlaceholder::Dictionary)); + logtype.push_back(enum_to_underlying_type(ir::VariablePlaceholder::Dictionary)); } /** @@ -64,7 +64,7 @@ class EncodedVariableInterpreter { * @param logtype */ static void add_int_var(std::string& logtype) { - logtype.emplace_back(enum_to_underlying_type(ir::VariablePlaceholder::Integer)); + logtype.push_back(enum_to_underlying_type(ir::VariablePlaceholder::Integer)); } /** @@ -72,7 +72,7 @@ class EncodedVariableInterpreter { * @param logtype */ static void add_float_var(std::string& logtype) { - logtype.emplace_back(enum_to_underlying_type(ir::VariablePlaceholder::Float)); + logtype.push_back(enum_to_underlying_type(ir::VariablePlaceholder::Float)); } /** @@ -80,7 +80,7 @@ class EncodedVariableInterpreter { * @param logtype */ static void add_escape(std::string& logtype) { - logtype.emplace_back(enum_to_underlying_type(ir::VariablePlaceholder::Escape)); + logtype.push_back(enum_to_underlying_type(ir::VariablePlaceholder::Escape)); } /** From 7c7e5d815c78b5bed01115becd2851303fdc8c51 Mon Sep 17 00:00:00 2001 From: gibber9809 Date: Wed, 30 Jul 2025 16:34:21 +0000 Subject: [PATCH 9/9] Address review comments. --- components/core/src/clp/EncodedVariableInterpreter.hpp | 10 +++++----- .../core/tests/test-EncodedVariableInterpreter.cpp | 5 ++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/components/core/src/clp/EncodedVariableInterpreter.hpp b/components/core/src/clp/EncodedVariableInterpreter.hpp index 6359ae8668..9f8fb87441 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.hpp +++ b/components/core/src/clp/EncodedVariableInterpreter.hpp @@ -113,15 +113,15 @@ class EncodedVariableInterpreter { /** * Parses all variables from a message (while constructing the logtype) and encodes them (adding * them to the variable dictionary if necessary) - * @tparam VariableDictionaryWriterType * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryWriterType * @param message * @param logtype_dict_entry * @param var_dict * @param encoded_vars * @param var_ids */ - template + template static void encode_and_add_to_dictionary( std::string_view message, LogTypeDictionaryEntryType& logtype_dict_entry, @@ -196,7 +196,7 @@ class EncodedVariableInterpreter { */ template < typename VariableDictionaryReaderType, - typename VariableDictionaryEntryType = VariableDictionaryReaderType::entry_t> + typename VariableDictionaryEntryType = typename VariableDictionaryReaderType::entry_t> static bool encode_and_search_dictionary( std::string_view var_str, VariableDictionaryReaderType const& var_dict, @@ -217,7 +217,7 @@ class EncodedVariableInterpreter { */ template < typename VariableDictionaryReaderType, - typename VariableDictionaryEntryType = VariableDictionaryReaderType::entry_t> + typename VariableDictionaryEntryType = typename VariableDictionaryReaderType::entry_t> static bool wildcard_search_dictionary_and_get_encoded_matches( std::string_view var_wildcard_str, VariableDictionaryReaderType const& var_dict, @@ -266,7 +266,7 @@ class EncodedVariableInterpreter { ); }; -template +template void EncodedVariableInterpreter::encode_and_add_to_dictionary( std::string_view message, LogTypeDictionaryEntryType& logtype_dict_entry, diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index 9b0f952358..9e746a86bb 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -412,10 +412,9 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { var_dict_reader.open(std::string{cVarDictPath}, std::string{cVarSegmentIndexPath}); var_dict_reader.read_new_entries(); - REQUIRE(var_dict_reader.get_entry_matching_value(std::string{var_strs.at(0)}, true).size() + REQUIRE(var_dict_reader.get_entry_matching_value(var_strs.at(0), true).size() == var_strs.size()); - REQUIRE(var_dict_reader.get_entry_matching_value(std::string{var_strs.at(0)}, false).size() - == 1); + REQUIRE(var_dict_reader.get_entry_matching_value(var_strs.at(0), false).size() == 1); var_dict_reader.close();