diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 9d49e21228..7e8f3a3a16 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -1,7 +1,10 @@ #ifndef CLP_DICTIONARYREADER_HPP #define CLP_DICTIONARYREADER_HPP +#include #include +#include +#include #include #include @@ -34,6 +37,9 @@ class DictionaryReader { char const* what() const noexcept override { return "DictionaryReader operation failed"; } }; + using dictionary_id_t = DictionaryIdType; + using entry_t = EntryType; + // Constructors DictionaryReader() : m_is_open(false), m_num_segments_read_from_index(0) { static_assert( @@ -85,7 +91,7 @@ class DictionaryReader { * @return a vector of matching entries, or an empty vector if no entry matches. */ std::vector - get_entry_matching_value(std::string const& search_string, bool ignore_case) const; + get_entry_matching_value(std::string_view search_string, bool ignore_case) const; /** * Gets the entries that match a given wildcard string * @param wildcard_string @@ -93,7 +99,7 @@ class DictionaryReader { * @param entries Set in which to store found entries */ void get_entries_matching_wildcard_string( - std::string const& wildcard_string, + std::string_view wildcard_string, bool ignore_case, std::unordered_set& entries ) const; @@ -235,7 +241,7 @@ DictionaryReader::get_value(DictionaryIdType id) co template std::vector DictionaryReader::get_entry_matching_value( - std::string const& search_string, + std::string_view search_string, bool ignore_case ) const { if (false == ignore_case) { @@ -252,7 +258,11 @@ DictionaryReader::get_entry_matching_value( } std::vector entries; - auto const search_string_uppercase = boost::algorithm::to_upper_copy(search_string); + std::string search_string_uppercase; + std::ignore = boost::algorithm::to_upper_copy( + std::back_inserter(search_string_uppercase), + search_string + ); for (auto const& entry : m_entries) { if (boost::algorithm::to_upper_copy(entry.get_value()) == search_string_uppercase) { entries.push_back(&entry); @@ -263,7 +273,7 @@ DictionaryReader::get_entry_matching_value( template void DictionaryReader::get_entries_matching_wildcard_string( - std::string const& wildcard_string, + std::string_view wildcard_string, bool ignore_case, std::unordered_set& entries ) const { diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index 7cac9d5aa5..280a2ae3dd 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -2,7 +2,8 @@ #define CLP_DICTIONARYWRITER_HPP #include -#include + +#include #include "ArrayBackedPosIntSet.hpp" #include "Defs.h" @@ -34,6 +35,9 @@ class DictionaryWriter { char const* what() const noexcept override { return "DictionaryWriter operation failed"; } }; + using dictionary_id_t = DictionaryIdType; + using entry_t = EntryType; + // Constructors DictionaryWriter() : m_is_open(false) {} @@ -83,7 +87,7 @@ class DictionaryWriter { protected: // Types - using value_to_id_t = std::unordered_map; + using value_to_id_t = absl::flat_hash_map; // Variables bool m_is_open; diff --git a/components/core/src/clp/EncodedVariableInterpreter.cpp b/components/core/src/clp/EncodedVariableInterpreter.cpp index 6d9bb14f12..61a976f8d7 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.cpp +++ b/components/core/src/clp/EncodedVariableInterpreter.cpp @@ -2,25 +2,17 @@ #include #include -#include +#include +#include #include #include "Defs.h" -#include "ffi/ir_stream/decoding_methods.hpp" -#include "ir/LogEvent.hpp" -#include "ir/types.hpp" -#include "spdlog_with_specializations.hpp" #include "type_utils.hpp" using clp::ffi::cEightByteEncodedFloatDigitsBitMask; -using clp::ir::eight_byte_encoded_variable_t; -using clp::ir::four_byte_encoded_variable_t; -using clp::ir::LogEvent; -using clp::ir::VariablePlaceholder; using std::string; -using std::unordered_set; -using std::vector; +using std::string_view; namespace clp { variable_dictionary_id_t EncodedVariableInterpreter::decode_var_dict_id( @@ -30,7 +22,7 @@ variable_dictionary_id_t EncodedVariableInterpreter::decode_var_dict_id( } bool EncodedVariableInterpreter::convert_string_to_representable_integer_var( - string const& value, + string_view value, encoded_variable_t& encoded_var ) { size_t length = value.length(); @@ -69,7 +61,7 @@ bool EncodedVariableInterpreter::convert_string_to_representable_integer_var( } bool EncodedVariableInterpreter::convert_string_to_representable_float_var( - string const& value, + string_view value, encoded_variable_t& encoded_var ) { if (value.empty()) { @@ -204,299 +196,7 @@ void EncodedVariableInterpreter::convert_encoded_float_to_string( value[value_length - 1 - decimal_pos] = '.'; } -void EncodedVariableInterpreter::encode_and_add_to_dictionary( - string const& message, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - vector& encoded_vars, - vector& var_ids -) { - // Extract all variables and add to dictionary while building logtype - size_t var_begin_pos = 0; - size_t var_end_pos = 0; - string var_str; - logtype_dict_entry.clear(); - // To avoid reallocating the logtype as we append to it, reserve enough space to hold the entire - // message - logtype_dict_entry.reserve_constant_length(message.length()); - while (logtype_dict_entry.parse_next_var(message, var_begin_pos, var_end_pos, var_str)) { - auto encoded_var = encode_var(var_str, logtype_dict_entry, var_dict, var_ids); - encoded_vars.push_back(encoded_var); - } -} - -template -void EncodedVariableInterpreter::encode_and_add_to_dictionary( - LogEvent const& log_event, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - std::vector& encoded_vars, - std::vector& var_ids, - size_t& raw_num_bytes -) { - logtype_dict_entry.clear(); - auto const& log_message = log_event.get_message(); - logtype_dict_entry.reserve_constant_length(log_message.get_logtype().length()); - - raw_num_bytes = 0; - - auto constant_handler = [&](std::string const& value, size_t begin_pos, size_t length) { - raw_num_bytes += length; - logtype_dict_entry.add_constant(value, begin_pos, length); - }; - - auto encoded_int_handler = [&](encoded_variable_t encoded_var) { - raw_num_bytes += ffi::decode_integer_var(encoded_var).length(); - logtype_dict_entry.add_int_var(); - - eight_byte_encoded_variable_t eight_byte_encoded_var{}; - if constexpr (std::is_same_v) { - eight_byte_encoded_var = encoded_var; - } else { // std::is_same_v - eight_byte_encoded_var = ffi::encode_four_byte_integer_as_eight_byte(encoded_var); - } - encoded_vars.push_back(eight_byte_encoded_var); - }; - - auto encoded_float_handler = [&](four_byte_encoded_variable_t encoded_var) { - raw_num_bytes += ffi::decode_float_var(encoded_var).length(); - logtype_dict_entry.add_float_var(); - - eight_byte_encoded_variable_t eight_byte_encoded_var{}; - if constexpr (std::is_same_v) { - eight_byte_encoded_var = encoded_var; - } else { // std::is_same_v - eight_byte_encoded_var = ffi::encode_four_byte_float_as_eight_byte(encoded_var); - } - encoded_vars.push_back(eight_byte_encoded_var); - }; - - auto dict_var_handler = [&](string const& dict_var) { - raw_num_bytes += dict_var.length(); - - eight_byte_encoded_variable_t encoded_var{}; - if constexpr (std::is_same_v) { - encoded_var = encode_var_dict_id( - add_dict_var(dict_var, logtype_dict_entry, var_dict, var_ids) - ); - } else { // std::is_same_v - encoded_var = encode_var(dict_var, logtype_dict_entry, var_dict, var_ids); - } - encoded_vars.push_back(encoded_var); - }; - - ffi::ir_stream::generic_decode_message( - log_message.get_logtype(), - log_message.get_encoded_vars(), - log_message.get_dict_vars(), - constant_handler, - encoded_int_handler, - encoded_float_handler, - dict_var_handler - ); -} - -bool EncodedVariableInterpreter::decode_variables_into_message( - LogTypeDictionaryEntry const& logtype_dict_entry, - VariableDictionaryReader const& var_dict, - vector const& encoded_vars, - string& decompressed_msg -) { - // Ensure the number of variables in the logtype matches the number of encoded variables given - auto const& logtype_value = logtype_dict_entry.get_value(); - size_t const num_vars = logtype_dict_entry.get_num_variables(); - if (num_vars != encoded_vars.size()) { - SPDLOG_ERROR( - "EncodedVariableInterpreter: Logtype '{}' contains {} variables, but {} were given " - "for decoding.", - logtype_value.c_str(), - num_vars, - encoded_vars.size() - ); - return false; - } - - VariablePlaceholder var_placeholder; - size_t constant_begin_pos = 0; - string float_str; - variable_dictionary_id_t var_dict_id; - size_t const num_placeholders_in_logtype = logtype_dict_entry.get_num_placeholders(); - for (size_t placeholder_ix = 0, var_ix = 0; placeholder_ix < num_placeholders_in_logtype; - ++placeholder_ix) - { - size_t placeholder_position - = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); - - // Add the constant that's between the last placeholder and this one - decompressed_msg.append( - logtype_value, - constant_begin_pos, - placeholder_position - constant_begin_pos - ); - switch (var_placeholder) { - case VariablePlaceholder::Integer: - decompressed_msg += std::to_string(encoded_vars[var_ix++]); - break; - case VariablePlaceholder::Float: - convert_encoded_float_to_string(encoded_vars[var_ix++], float_str); - decompressed_msg += float_str; - break; - case VariablePlaceholder::Dictionary: - var_dict_id = decode_var_dict_id(encoded_vars[var_ix++]); - decompressed_msg += var_dict.get_value(var_dict_id); - break; - case VariablePlaceholder::Escape: - break; - default: - SPDLOG_ERROR( - "EncodedVariableInterpreter: Logtype '{}' contains unexpected variable " - "placeholder 0x{:x}", - logtype_value, - enum_to_underlying_type(var_placeholder) - ); - return false; - } - // Move past the variable placeholder - constant_begin_pos = placeholder_position + 1; - } - // Append remainder of logtype, if any - if (constant_begin_pos < logtype_value.length()) { - decompressed_msg.append(logtype_value, constant_begin_pos, string::npos); - } - - return true; -} - -bool EncodedVariableInterpreter::encode_and_search_dictionary( - string const& var_str, - VariableDictionaryReader const& var_dict, - bool ignore_case, - string& logtype, - SubQuery& sub_query -) { - size_t length = var_str.length(); - if (0 == length) { - throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); - } - - encoded_variable_t encoded_var; - if (convert_string_to_representable_integer_var(var_str, encoded_var)) { - LogTypeDictionaryEntry::add_int_var(logtype); - sub_query.add_non_dict_var(encoded_var); - } else if (convert_string_to_representable_float_var(var_str, encoded_var)) { - LogTypeDictionaryEntry::add_float_var(logtype); - sub_query.add_non_dict_var(encoded_var); - } else { - auto const entries = var_dict.get_entry_matching_value(var_str, ignore_case); - if (entries.empty()) { - // Not in dictionary - return false; - } - - LogTypeDictionaryEntry::add_dict_var(logtype); - - if (entries.size() == 1) { - auto const* entry = entries.at(0); - sub_query.add_dict_var(encode_var_dict_id(entry->get_id()), entry); - return true; - } - - std::unordered_set const entries_set{ - entries.cbegin(), - entries.cend() - }; - std::unordered_set encoded_vars; - encoded_vars.reserve(entries.size()); - for (auto const* entry : entries) { - encoded_vars.emplace(encode_var_dict_id(entry->get_id())); - } - sub_query.add_imprecise_dict_var(encoded_vars, entries_set); - } - - return true; -} - -bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matches( - std::string const& var_wildcard_str, - VariableDictionaryReader const& var_dict, - bool ignore_case, - SubQuery& sub_query -) { - // Find matches - unordered_set var_dict_entries; - var_dict.get_entries_matching_wildcard_string(var_wildcard_str, ignore_case, var_dict_entries); - if (var_dict_entries.empty()) { - // Not in dictionary - return false; - } - - // Encode matches - unordered_set encoded_vars; - for (auto entry : var_dict_entries) { - encoded_vars.insert(encode_var_dict_id(entry->get_id())); - } - - sub_query.add_imprecise_dict_var(encoded_vars, var_dict_entries); - - return true; -} - encoded_variable_t EncodedVariableInterpreter::encode_var_dict_id(variable_dictionary_id_t id) { return bit_cast(id); } - -encoded_variable_t EncodedVariableInterpreter::encode_var( - string const& var, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - vector& var_ids -) { - encoded_variable_t encoded_var{0}; - if (convert_string_to_representable_integer_var(var, encoded_var)) { - logtype_dict_entry.add_int_var(); - } else if (convert_string_to_representable_float_var(var, encoded_var)) { - logtype_dict_entry.add_float_var(); - } else { - // Variable string looks like a dictionary variable, so encode it as so - encoded_var = encode_var_dict_id(add_dict_var(var, logtype_dict_entry, var_dict, var_ids)); - } - return encoded_var; -} - -variable_dictionary_id_t EncodedVariableInterpreter::add_dict_var( - string const& var, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - vector& var_ids -) { - variable_dictionary_id_t id{cVariableDictionaryIdMax}; - var_dict.add_entry(var, id); - var_ids.push_back(id); - - logtype_dict_entry.add_dictionary_var(); - - return id; -} - -// Explicitly declare template specializations so that we can define the template methods in this -// file -template void -EncodedVariableInterpreter::encode_and_add_to_dictionary( - LogEvent const& log_event, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - std::vector& encoded_vars, - std::vector& var_ids, - size_t& raw_num_bytes -); - -template void -EncodedVariableInterpreter::encode_and_add_to_dictionary( - LogEvent const& log_event, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, - std::vector& encoded_vars, - std::vector& var_ids, - size_t& raw_num_bytes -); } // namespace clp diff --git a/components/core/src/clp/EncodedVariableInterpreter.hpp b/components/core/src/clp/EncodedVariableInterpreter.hpp index 9bb216a29a..9f8fb87441 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.hpp +++ b/components/core/src/clp/EncodedVariableInterpreter.hpp @@ -2,14 +2,17 @@ #define CLP_ENCODEDVARIABLEINTERPRETER_HPP #include +#include +#include #include +#include "ffi/ir_stream/decoding_methods.hpp" #include "ir/LogEvent.hpp" #include "ir/types.hpp" #include "Query.hpp" +#include "spdlog_with_specializations.hpp" #include "TraceableException.hpp" -#include "VariableDictionaryReader.hpp" -#include "VariableDictionaryWriter.hpp" +#include "type_utils.hpp" namespace clp { /** @@ -47,6 +50,39 @@ class EncodedVariableInterpreter { // Methods static encoded_variable_t encode_var_dict_id(variable_dictionary_id_t id); static variable_dictionary_id_t decode_var_dict_id(encoded_variable_t encoded_var); + + /** + * Adds a dictionary variable placeholder to the given logtype + * @param logtype + */ + static void add_dict_var(std::string& logtype) { + logtype.push_back(enum_to_underlying_type(ir::VariablePlaceholder::Dictionary)); + } + + /** + * Adds an integer variable placeholder to the given logtype + * @param logtype + */ + static void add_int_var(std::string& logtype) { + logtype.push_back(enum_to_underlying_type(ir::VariablePlaceholder::Integer)); + } + + /** + * Adds a float variable placeholder to the given logtype + * @param logtype + */ + static void add_float_var(std::string& logtype) { + logtype.push_back(enum_to_underlying_type(ir::VariablePlaceholder::Float)); + } + + /** + * Adds an escape character to the given logtype + * @param logtype + */ + static void add_escape(std::string& logtype) { + logtype.push_back(enum_to_underlying_type(ir::VariablePlaceholder::Escape)); + } + /** * Converts the given string into a representable integer variable if possible * @param value @@ -54,7 +90,7 @@ class EncodedVariableInterpreter { * @return true if was successfully converted, false otherwise */ static bool convert_string_to_representable_integer_var( - std::string const& value, + std::string_view value, encoded_variable_t& encoded_var ); /** @@ -64,7 +100,7 @@ class EncodedVariableInterpreter { * @return true if was successfully converted, false otherwise */ static bool convert_string_to_representable_float_var( - std::string const& value, + std::string_view value, encoded_variable_t& encoded_var ); /** @@ -77,16 +113,19 @@ class EncodedVariableInterpreter { /** * Parses all variables from a message (while constructing the logtype) and encodes them (adding * them to the variable dictionary if necessary) + * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryWriterType * @param message * @param logtype_dict_entry * @param var_dict * @param encoded_vars * @param var_ids */ + template static void encode_and_add_to_dictionary( - std::string const& message, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, + std::string_view message, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, std::vector& encoded_vars, std::vector& var_ids ); @@ -95,7 +134,9 @@ class EncodedVariableInterpreter { * Encodes the given IR log event, constructing a logtype dictionary entry, and adding any * dictionary variables to the dictionary. NOTE: Four-byte encoded variables will be converted * to eight-byte encoded variables. - * @tparam encoded_variable_t The type of the encoded variables in the log event + * @tparam EncodedVariableType The type of the encoded variables in the log event. + * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryWriterType * @param log_event * @param logtype_dict_entry * @param var_dict @@ -104,11 +145,14 @@ class EncodedVariableInterpreter { * @param raw_num_bytes Returns an estimate of the number of bytes that this log event would * occupy if it was not encoded in CLP's IR */ - template + template < + typename EncodedVariableType, + typename LogTypeDictionaryEntryType, + typename VariableDictionaryWriterType> static void encode_and_add_to_dictionary( - ir::LogEvent const& log_event, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, + ir::LogEvent const& log_event, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, std::vector& encoded_vars, std::vector& var_ids, size_t& raw_num_bytes @@ -116,22 +160,31 @@ class EncodedVariableInterpreter { /** * Decodes all variables and decompresses them into a message + * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryReaderType + * @tparam EncodedVariableContainerType A random access list of `clp::encoded_variable_t`. * @param logtype_dict_entry * @param var_dict * @param encoded_vars * @param decompressed_msg * @return true if successful, false otherwise */ + template < + typename LogTypeDictionaryEntryType, + typename VariableDictionaryReaderType, + typename EncodedVariableContainerType> static bool decode_variables_into_message( - LogTypeDictionaryEntry const& logtype_dict_entry, - VariableDictionaryReader const& var_dict, - std::vector const& encoded_vars, + LogTypeDictionaryEntryType const& logtype_dict_entry, + VariableDictionaryReaderType const& var_dict, + EncodedVariableContainerType const& encoded_vars, std::string& decompressed_msg ); /** * Encodes a string-form variable, and if it is dictionary variable, searches for its ID in the - * given variable dictionary + * given variable dictionary. + * @tparam VariableDictionaryReaderType + * @tparam VariableDictionaryEntryType * @param var_str * @param var_dict * @param ignore_case @@ -141,25 +194,33 @@ class EncodedVariableInterpreter { * dictionary * @return false otherwise */ + template < + typename VariableDictionaryReaderType, + typename VariableDictionaryEntryType = typename VariableDictionaryReaderType::entry_t> static bool encode_and_search_dictionary( - std::string const& var_str, - VariableDictionaryReader const& var_dict, + std::string_view var_str, + VariableDictionaryReaderType const& var_dict, bool ignore_case, std::string& logtype, SubQuery& sub_query ); /** * Search for the given string-form variable in the variable dictionary, encode any matches, and - * add them to the given sub-query + * add them to the given sub-query. + * @tparam VariableDictionaryReaderType + * @tparam VariableDictionaryEntryType * @param var_wildcard_str * @param var_dict * @param ignore_case * @param sub_query * @return true if any match found, false otherwise */ + template < + typename VariableDictionaryReaderType, + typename VariableDictionaryEntryType = typename VariableDictionaryReaderType::entry_t> static bool wildcard_search_dictionary_and_get_encoded_matches( - std::string const& var_wildcard_str, - VariableDictionaryReader const& var_dict, + std::string_view var_wildcard_str, + VariableDictionaryReaderType const& var_dict, bool ignore_case, SubQuery& sub_query ); @@ -167,7 +228,9 @@ class EncodedVariableInterpreter { private: /** * Encodes the given string as a dictionary or non-dictionary variable and adds a corresponding - * placeholder to the logtype + * placeholder to the logtype. + * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryWriterType * @param var * @param logtype_dict_entry * @param var_dict @@ -175,29 +238,315 @@ class EncodedVariableInterpreter { * variable) * @return The encoded variable */ + template static encoded_variable_t encode_var( - std::string const& var, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, + std::string_view var, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, std::vector& var_ids ); /** * Adds the given string to the variable dictionary and adds a corresponding placeholder to - * logtype + * logtype. + * @tparam LogTypeDictionaryEntryType + * @tparam VariableDictionaryWriterType * @param var * @param logtype_dict_entry * @param var_dict * @param var_ids A container to add the dictionary ID to * @return The dictionary ID */ + template static variable_dictionary_id_t add_dict_var( - std::string const& var, - LogTypeDictionaryEntry& logtype_dict_entry, - VariableDictionaryWriter& var_dict, + std::string_view var, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, std::vector& var_ids ); }; + +template +void EncodedVariableInterpreter::encode_and_add_to_dictionary( + std::string_view message, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, + std::vector& encoded_vars, + std::vector& var_ids +) { + // Extract all variables and add to dictionary while building logtype + size_t var_begin_pos = 0; + size_t var_end_pos = 0; + std::string_view var_str; + logtype_dict_entry.clear(); + // To avoid reallocating the logtype as we append to it, reserve enough space to hold the entire + // message + logtype_dict_entry.reserve_constant_length(message.length()); + while (logtype_dict_entry.parse_next_var(message, var_begin_pos, var_end_pos, var_str)) { + auto encoded_var = encode_var(var_str, logtype_dict_entry, var_dict, var_ids); + encoded_vars.push_back(encoded_var); + } +} + +template < + typename EncodedVariableType, + typename LogTypeDictionaryEntryType, + typename VariableDictionaryWriterType> +void EncodedVariableInterpreter::encode_and_add_to_dictionary( + ir::LogEvent const& log_event, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, + std::vector& encoded_vars, + std::vector& var_ids, + size_t& raw_num_bytes +) { + logtype_dict_entry.clear(); + auto const& log_message = log_event.get_message(); + logtype_dict_entry.reserve_constant_length(log_message.get_logtype().length()); + + raw_num_bytes = 0; + + auto constant_handler = [&](std::string const& value, size_t begin_pos, size_t length) { + raw_num_bytes += length; + logtype_dict_entry.add_constant(value, begin_pos, length); + }; + + auto encoded_int_handler = [&](EncodedVariableType encoded_var) { + raw_num_bytes += ffi::decode_integer_var(encoded_var).length(); + logtype_dict_entry.add_int_var(); + + ir::eight_byte_encoded_variable_t eight_byte_encoded_var{}; + if constexpr (std::is_same_v) { + eight_byte_encoded_var = encoded_var; + } else { // std::is_same_v + eight_byte_encoded_var = ffi::encode_four_byte_integer_as_eight_byte(encoded_var); + } + encoded_vars.push_back(eight_byte_encoded_var); + }; + + auto encoded_float_handler = [&](EncodedVariableType encoded_var) { + raw_num_bytes += ffi::decode_float_var(encoded_var).length(); + logtype_dict_entry.add_float_var(); + + ir::eight_byte_encoded_variable_t eight_byte_encoded_var{}; + if constexpr (std::is_same_v) { + eight_byte_encoded_var = encoded_var; + } else { // std::is_same_v + eight_byte_encoded_var = ffi::encode_four_byte_float_as_eight_byte(encoded_var); + } + encoded_vars.push_back(eight_byte_encoded_var); + }; + + auto dict_var_handler = [&](std::string const& dict_var) { + raw_num_bytes += dict_var.length(); + + ir::eight_byte_encoded_variable_t encoded_var{}; + if constexpr (std::is_same_v) { + encoded_var = encode_var_dict_id( + add_dict_var(dict_var, logtype_dict_entry, var_dict, var_ids) + ); + } else { // std::is_same_v + encoded_var = encode_var(dict_var, logtype_dict_entry, var_dict, var_ids); + } + encoded_vars.push_back(encoded_var); + }; + + ffi::ir_stream::generic_decode_message( + log_message.get_logtype(), + log_message.get_encoded_vars(), + log_message.get_dict_vars(), + constant_handler, + encoded_int_handler, + encoded_float_handler, + dict_var_handler + ); +} + +template < + typename LogTypeDictionaryEntryType, + typename VariableDictionaryReaderType, + typename EncodedVariableContainerType> +bool EncodedVariableInterpreter::decode_variables_into_message( + LogTypeDictionaryEntryType const& logtype_dict_entry, + VariableDictionaryReaderType const& var_dict, + EncodedVariableContainerType const& encoded_vars, + std::string& decompressed_msg +) { + // Ensure the number of variables in the logtype matches the number of encoded variables given + auto const& logtype_value = logtype_dict_entry.get_value(); + size_t const num_vars = logtype_dict_entry.get_num_variables(); + if (num_vars != encoded_vars.size()) { + SPDLOG_ERROR( + "EncodedVariableInterpreter: Logtype '{}' contains {} variables, but {} were given " + "for decoding.", + logtype_value.c_str(), + num_vars, + encoded_vars.size() + ); + return false; + } + + ir::VariablePlaceholder var_placeholder; + size_t constant_begin_pos = 0; + std::string float_str; + variable_dictionary_id_t var_dict_id; + size_t const num_placeholders_in_logtype = logtype_dict_entry.get_num_placeholders(); + for (size_t placeholder_ix = 0, var_ix = 0; placeholder_ix < num_placeholders_in_logtype; + ++placeholder_ix) + { + size_t placeholder_position + = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); + + // Add the constant that's between the last placeholder and this one + decompressed_msg.append( + logtype_value, + constant_begin_pos, + placeholder_position - constant_begin_pos + ); + switch (var_placeholder) { + case ir::VariablePlaceholder::Integer: + decompressed_msg += std::to_string(encoded_vars[var_ix++]); + break; + case ir::VariablePlaceholder::Float: + convert_encoded_float_to_string(encoded_vars[var_ix++], float_str); + decompressed_msg += float_str; + break; + case ir::VariablePlaceholder::Dictionary: + var_dict_id = decode_var_dict_id(encoded_vars[var_ix++]); + decompressed_msg += var_dict.get_value(var_dict_id); + break; + case ir::VariablePlaceholder::Escape: + break; + default: + SPDLOG_ERROR( + "EncodedVariableInterpreter: Logtype '{}' contains unexpected variable " + "placeholder 0x{:x}", + logtype_value, + enum_to_underlying_type(var_placeholder) + ); + return false; + } + // Move past the variable placeholder + constant_begin_pos = placeholder_position + 1; + } + // Append remainder of logtype, if any + if (constant_begin_pos < logtype_value.length()) { + decompressed_msg.append(logtype_value, constant_begin_pos, std::string::npos); + } + + return true; +} + +template +bool EncodedVariableInterpreter::encode_and_search_dictionary( + std::string_view var_str, + VariableDictionaryReaderType const& var_dict, + bool ignore_case, + std::string& logtype, + SubQuery& sub_query +) { + size_t length = var_str.length(); + if (0 == length) { + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } + + encoded_variable_t encoded_var; + if (convert_string_to_representable_integer_var(var_str, encoded_var)) { + add_int_var(logtype); + sub_query.add_non_dict_var(encoded_var); + } else if (convert_string_to_representable_float_var(var_str, encoded_var)) { + add_float_var(logtype); + sub_query.add_non_dict_var(encoded_var); + } else { + auto const entries = var_dict.get_entry_matching_value(var_str, ignore_case); + if (entries.empty()) { + // Not in dictionary + return false; + } + + add_dict_var(logtype); + + if (entries.size() == 1) { + auto const* entry = entries.at(0); + sub_query.add_dict_var(encode_var_dict_id(entry->get_id()), entry); + return true; + } + + std::unordered_set const entries_set{ + entries.cbegin(), + entries.cend() + }; + std::unordered_set encoded_vars; + encoded_vars.reserve(entries.size()); + for (auto const* entry : entries) { + encoded_vars.emplace(encode_var_dict_id(entry->get_id())); + } + sub_query.add_imprecise_dict_var(encoded_vars, entries_set); + } + + return true; +} + +template +bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matches( + std::string_view var_wildcard_str, + VariableDictionaryReaderType const& var_dict, + bool ignore_case, + SubQuery& sub_query +) { + // Find matches + std::unordered_set var_dict_entries; + var_dict.get_entries_matching_wildcard_string(var_wildcard_str, ignore_case, var_dict_entries); + if (var_dict_entries.empty()) { + // Not in dictionary + return false; + } + + // Encode matches + std::unordered_set encoded_vars; + for (auto entry : var_dict_entries) { + encoded_vars.emplace(encode_var_dict_id(entry->get_id())); + } + + sub_query.add_imprecise_dict_var(encoded_vars, var_dict_entries); + + return true; +} + +template +encoded_variable_t EncodedVariableInterpreter::encode_var( + std::string_view var, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, + std::vector& var_ids +) { + encoded_variable_t encoded_var{0}; + if (convert_string_to_representable_integer_var(var, encoded_var)) { + logtype_dict_entry.add_int_var(); + } else if (convert_string_to_representable_float_var(var, encoded_var)) { + logtype_dict_entry.add_float_var(); + } else { + // Variable string looks like a dictionary variable, so encode it as so + encoded_var = encode_var_dict_id(add_dict_var(var, logtype_dict_entry, var_dict, var_ids)); + } + return encoded_var; +} + +template +variable_dictionary_id_t EncodedVariableInterpreter::add_dict_var( + std::string_view var, + LogTypeDictionaryEntryType& logtype_dict_entry, + VariableDictionaryWriterType& var_dict, + std::vector& var_ids +) { + variable_dictionary_id_t id{cVariableDictionaryIdMax}; + var_dict.add_entry(var, id); + var_ids.push_back(id); + + logtype_dict_entry.add_dictionary_var(); + + return id; +} } // namespace clp #endif // CLP_ENCODEDVARIABLEINTERPRETER_HPP diff --git a/components/core/src/clp/Grep.cpp b/components/core/src/clp/Grep.cpp index e0c2caf51c..a8decb7fb3 100644 --- a/components/core/src/clp/Grep.cpp +++ b/components/core/src/clp/Grep.cpp @@ -343,11 +343,11 @@ bool process_var_token( } if (query_token.is_float_var()) { - LogTypeDictionaryEntry::add_float_var(logtype); + EncodedVariableInterpreter::add_float_var(logtype); } else if (query_token.is_int_var()) { - LogTypeDictionaryEntry::add_int_var(logtype); + EncodedVariableInterpreter::add_int_var(logtype); } else { - LogTypeDictionaryEntry::add_dict_var(logtype); + EncodedVariableInterpreter::add_dict_var(logtype); if (query_token.cannot_convert_to_non_dict_var()) { // Must be a dictionary variable, so search variable dictionary @@ -451,7 +451,7 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery( logtype += '*'; } else { logtype += '*'; - LogTypeDictionaryEntry::add_dict_var(logtype); + EncodedVariableInterpreter::add_dict_var(logtype); logtype += '*'; } } else { diff --git a/components/core/src/clp/LogTypeDictionaryEntry.cpp b/components/core/src/clp/LogTypeDictionaryEntry.cpp index 62a9db7bf5..1b8784cd77 100644 --- a/components/core/src/clp/LogTypeDictionaryEntry.cpp +++ b/components/core/src/clp/LogTypeDictionaryEntry.cpp @@ -1,5 +1,6 @@ #include "LogTypeDictionaryEntry.hpp" +#include "EncodedVariableInterpreter.hpp" #include "ir/parsing.hpp" #include "ir/types.hpp" #include "type_utils.hpp" @@ -33,7 +34,7 @@ size_t LogTypeDictionaryEntry::get_data_size() const { } void LogTypeDictionaryEntry::add_constant( - string const& value_containing_constant, + std::string_view value_containing_constant, size_t begin_pos, size_t length ) { @@ -42,30 +43,30 @@ void LogTypeDictionaryEntry::add_constant( void LogTypeDictionaryEntry::add_dictionary_var() { m_placeholder_positions.push_back(m_value.length()); - add_dict_var(m_value); + EncodedVariableInterpreter::add_dict_var(m_value); } void LogTypeDictionaryEntry::add_int_var() { m_placeholder_positions.push_back(m_value.length()); - add_int_var(m_value); + EncodedVariableInterpreter::add_int_var(m_value); } void LogTypeDictionaryEntry::add_float_var() { m_placeholder_positions.push_back(m_value.length()); - add_float_var(m_value); + EncodedVariableInterpreter::add_float_var(m_value); } void LogTypeDictionaryEntry::add_escape() { m_placeholder_positions.push_back(m_value.length()); - add_escape(m_value); + EncodedVariableInterpreter::add_escape(m_value); ++m_num_escaped_placeholders; } bool LogTypeDictionaryEntry::parse_next_var( - string const& msg, + std::string_view msg, size_t& var_begin_pos, size_t& var_end_pos, - string& var + std::string_view& var ) { auto last_var_end_pos = var_end_pos; // clang-format off @@ -81,21 +82,15 @@ bool LogTypeDictionaryEntry::parse_next_var( // clang-format on if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { // Append to log type: from end of last variable to start of current variable - auto constant = static_cast(msg).substr( - last_var_end_pos, - var_begin_pos - last_var_end_pos - ); + auto constant = msg.substr(last_var_end_pos, var_begin_pos - last_var_end_pos); ir::append_constant_to_logtype(constant, escape_handler, m_value); - var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); + var = msg.substr(var_begin_pos, var_end_pos - var_begin_pos); return true; } if (last_var_end_pos < msg.length()) { // Append to log type: from end of last variable to end - auto constant = static_cast(msg).substr( - last_var_end_pos, - msg.length() - last_var_end_pos - ); + auto constant = msg.substr(last_var_end_pos, msg.length() - last_var_end_pos); ir::append_constant_to_logtype(constant, escape_handler, m_value); } diff --git a/components/core/src/clp/LogTypeDictionaryEntry.hpp b/components/core/src/clp/LogTypeDictionaryEntry.hpp index 7cd77650f5..91488ef017 100644 --- a/components/core/src/clp/LogTypeDictionaryEntry.hpp +++ b/components/core/src/clp/LogTypeDictionaryEntry.hpp @@ -1,6 +1,7 @@ #ifndef CLP_LOGTYPEDICTIONARYENTRY_HPP #define CLP_LOGTYPEDICTIONARYENTRY_HPP +#include #include #include "Defs.h" @@ -11,7 +12,6 @@ #include "streaming_compression/zstd/Compressor.hpp" #include "streaming_compression/zstd/Decompressor.hpp" #include "TraceableException.hpp" -#include "type_utils.hpp" namespace clp { /** @@ -42,38 +42,6 @@ class LogTypeDictionaryEntry : public DictionaryEntry { LogTypeDictionaryEntry& operator=(LogTypeDictionaryEntry const&) = default; // Methods - /** - * Adds a dictionary variable placeholder to the given logtype - * @param logtype - */ - static void add_dict_var(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); - } - - /** - * Adds an integer variable placeholder to the given logtype - * @param logtype - */ - static void add_int_var(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Integer); - } - - /** - * Adds a float variable placeholder to the given logtype - * @param logtype - */ - static void add_float_var(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); - } - - /** - * Adds an escape character to the given logtype - * @param logtype - */ - static void add_escape(std::string& logtype) { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); - } - /** * @return The number of variable placeholders (including escaped ones) in the logtype. */ @@ -106,8 +74,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry { * @param begin_pos Start of the constant in value_containing_constant * @param length */ - void - add_constant(std::string const& value_containing_constant, size_t begin_pos, size_t length); + void add_constant(std::string_view value_containing_constant, size_t begin_pos, size_t length); /** * Adds an int variable placeholder */ @@ -137,10 +104,10 @@ class LogTypeDictionaryEntry : public DictionaryEntry { * @return true if another variable was found, false otherwise */ bool parse_next_var( - std::string const& msg, + std::string_view msg, size_t& var_begin_pos, size_t& var_end_pos, - std::string& var + std::string_view& var ); /** diff --git a/components/core/src/clp/VariableDictionaryWriter.cpp b/components/core/src/clp/VariableDictionaryWriter.cpp index 77b0635035..abd056de98 100644 --- a/components/core/src/clp/VariableDictionaryWriter.cpp +++ b/components/core/src/clp/VariableDictionaryWriter.cpp @@ -1,10 +1,14 @@ #include "VariableDictionaryWriter.hpp" +#include +#include + +#include "Defs.h" #include "dictionary_utils.hpp" #include "spdlog_with_specializations.hpp" namespace clp { -bool VariableDictionaryWriter::add_entry(std::string const& value, variable_dictionary_id_t& id) { +bool VariableDictionaryWriter::add_entry(std::string_view value, variable_dictionary_id_t& id) { bool new_entry = false; auto const ix = m_value_to_id.find(value); @@ -23,7 +27,7 @@ bool VariableDictionaryWriter::add_entry(std::string const& value, variable_dict ++m_next_id; // Insert the ID obtained from the database into the dictionary - auto entry = VariableDictionaryEntry(value, id); + auto entry = VariableDictionaryEntry(std::string{value}, id); m_value_to_id[value] = id; new_entry = true; diff --git a/components/core/src/clp/VariableDictionaryWriter.hpp b/components/core/src/clp/VariableDictionaryWriter.hpp index 3e6384d2a8..47299499a3 100644 --- a/components/core/src/clp/VariableDictionaryWriter.hpp +++ b/components/core/src/clp/VariableDictionaryWriter.hpp @@ -1,6 +1,8 @@ #ifndef CLP_VARIABLEDICTIONARYWRITER_HPP #define CLP_VARIABLEDICTIONARYWRITER_HPP +#include + #include "Defs.h" #include "DictionaryWriter.hpp" #include "VariableDictionaryEntry.hpp" @@ -30,7 +32,7 @@ class VariableDictionaryWriter * @param value * @param id ID of the variable matching the given entry */ - bool add_entry(std::string const& value, variable_dictionary_id_t& id); + bool add_entry(std::string_view value, variable_dictionary_id_t& id); }; } // namespace clp diff --git a/components/core/src/clp/clg/CMakeLists.txt b/components/core/src/clp/clg/CMakeLists.txt index b8e2f1962b..816173bdb0 100644 --- a/components/core/src/clp/clg/CMakeLists.txt +++ b/components/core/src/clp/clg/CMakeLists.txt @@ -132,6 +132,7 @@ if(CLP_BUILD_EXECUTABLES) ) target_link_libraries(clg PRIVATE + absl::flat_hash_map Boost::filesystem Boost::program_options date::date fmt::fmt diff --git a/components/core/src/clp/clo/CMakeLists.txt b/components/core/src/clp/clo/CMakeLists.txt index a4cf0ddf47..94d03c79e0 100644 --- a/components/core/src/clp/clo/CMakeLists.txt +++ b/components/core/src/clp/clo/CMakeLists.txt @@ -160,6 +160,7 @@ if(CLP_BUILD_EXECUTABLES) ) target_link_libraries(clo PRIVATE + absl::flat_hash_map Boost::filesystem Boost::program_options date::date fmt::fmt diff --git a/components/core/src/clp/clp/CMakeLists.txt b/components/core/src/clp/clp/CMakeLists.txt index c9782c509f..e9b35c296e 100644 --- a/components/core/src/clp/clp/CMakeLists.txt +++ b/components/core/src/clp/clp/CMakeLists.txt @@ -174,6 +174,7 @@ if(CLP_BUILD_EXECUTABLES) ) target_link_libraries(clp PRIVATE + absl::flat_hash_map Boost::filesystem Boost::program_options date::date fmt::fmt diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index 4dcc6d399e..9e746a86bb 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -4,7 +4,10 @@ #include "../src/clp/EncodedVariableInterpreter.hpp" #include "../src/clp/ir/types.hpp" +#include "../src/clp/LogTypeDictionaryEntry.hpp" #include "../src/clp/streaming_archive/Constants.hpp" +#include "../src/clp/VariableDictionaryReader.hpp" +#include "../src/clp/VariableDictionaryWriter.hpp" using clp::cVariableDictionaryIdMax; using clp::encoded_variable_t; @@ -409,10 +412,9 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { var_dict_reader.open(std::string{cVarDictPath}, std::string{cVarSegmentIndexPath}); var_dict_reader.read_new_entries(); - REQUIRE(var_dict_reader.get_entry_matching_value(std::string{var_strs.at(0)}, true).size() + REQUIRE(var_dict_reader.get_entry_matching_value(var_strs.at(0), true).size() == var_strs.size()); - REQUIRE(var_dict_reader.get_entry_matching_value(std::string{var_strs.at(0)}, false).size() - == 1); + REQUIRE(var_dict_reader.get_entry_matching_value(var_strs.at(0), false).size() == 1); var_dict_reader.close();