diff --git a/components/core/config/schemas.txt b/components/core/config/schemas.txt index e0b777859d..9c09fb5d45 100644 --- a/components/core/config/schemas.txt +++ b/components/core/config/schemas.txt @@ -16,4 +16,4 @@ float:\-{0,1}[0-9]+\.[0-9]+ // Dictionary variables hex:[a-fA-F]+ hasNumber:.*\d.* -equals:.*=.*[a-zA-Z0-9].* +equals:.*=(?.*[a-zA-Z0-9].*) diff --git a/components/core/src/clp/GrepCore.cpp b/components/core/src/clp/GrepCore.cpp index 1a4bf499e2..b8abf5a980 100644 --- a/components/core/src/clp/GrepCore.cpp +++ b/components/core/src/clp/GrepCore.cpp @@ -257,9 +257,9 @@ bool GrepCore::get_bounds_of_next_potential_var( return false; } search_token = SearchToken{token.value()}; - search_token.m_type_ids_set.insert(search_token.m_type_ids_ptr->at(0)); + search_token.m_type_ids_set.insert(search_token.get_type_ids()->at(0)); } - auto const& type = search_token.m_type_ids_ptr->at(0); + auto const& type = search_token.get_type_ids()->at(0); if (type != static_cast(log_surgeon::SymbolId::TokenUncaughtString) && type != static_cast(log_surgeon::SymbolId::TokenEnd)) { diff --git a/components/core/src/clp/Utils.cpp b/components/core/src/clp/Utils.cpp index 5a4dc87b6f..dfb753fbd1 100644 --- a/components/core/src/clp/Utils.cpp +++ b/components/core/src/clp/Utils.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -187,6 +188,18 @@ load_lexer_from_file(std::string const& schema_file_path, log_surgeon::lexers::B for (std::unique_ptr const& parser_ast : schema_ast->m_schema_vars) { auto* rule = dynamic_cast(parser_ast.get()); + // Currently, we only support at most a single capture group in each variable. If a capture + // group is present its match will be treated as the variable rather than the full match. + auto const num_captures = rule->m_regex_ptr->get_subtree_positive_captures().size(); + if (1 < num_captures) { + throw std::runtime_error( + schema_file_path + ":" + std::to_string(rule->m_line_num + 1) + + ": error: the schema rule '" + rule->m_name + + "' has a regex pattern containing > 1 capture groups (found " + + std::to_string(num_captures) + ").\n" + ); + } + if ("timestamp" == rule->m_name) { continue; } diff --git a/components/core/src/clp/streaming_archive/writer/Archive.cpp b/components/core/src/clp/streaming_archive/writer/Archive.cpp index 3b8df0d73a..dcc191f5bd 100644 --- a/components/core/src/clp/streaming_archive/writer/Archive.cpp +++ b/components/core/src/clp/streaming_archive/writer/Archive.cpp @@ -2,16 +2,20 @@ #include +#include #include -#include -#include +#include +#include +#include #include #include #include #include +#include #include #include +#include #include #include "../../EncodedVariableInterpreter.hpp" @@ -19,15 +23,12 @@ #include "../../spdlog_with_specializations.hpp" #include "../../Utils.hpp" #include "../Constants.hpp" +#include "TimestampPattern.hpp" #include "utils.hpp" using clp::ir::eight_byte_encoded_variable_t; using clp::ir::four_byte_encoded_variable_t; -using log_surgeon::LogEventView; -using std::list; -using std::make_unique; using std::string; -using std::unordered_set; using std::vector; namespace clp::streaming_archive::writer { @@ -315,15 +316,124 @@ Archive::write_msg(epochtime_t timestamp, string const& message, size_t num_unco update_segment_indices(logtype_id, var_ids); } -void Archive::write_msg_using_schema(LogEventView const& log_view) { - epochtime_t timestamp = 0; - TimestampPattern* timestamp_pattern = nullptr; - auto const& log_output_buffer = log_view.get_log_output_buffer(); - if (log_output_buffer->has_timestamp()) { - size_t start; - size_t end; - timestamp_pattern = (TimestampPattern*)TimestampPattern::search_known_ts_patterns( - log_output_buffer->get_mutable_token(0).to_string(), +auto Archive::add_token_to_dicts( + log_surgeon::LogEventView const& log_view, + log_surgeon::Token token_view +) -> void { + auto const* type_ids{token_view.get_type_ids()}; + if (nullptr == type_ids || type_ids->empty()) { + throw std::runtime_error("Token has no type IDs: " + token_view.to_string()); + } + auto const token_type{type_ids->at(0)}; + switch (token_type) { + case static_cast(log_surgeon::SymbolId::TokenNewline): + case static_cast(log_surgeon::SymbolId::TokenUncaughtString): { + m_logtype_dict_entry.add_constant(token_view.to_string(), 0, token_view.get_length()); + break; + } + case static_cast(log_surgeon::SymbolId::TokenInt): { + encoded_variable_t encoded_var{}; + if (false + == EncodedVariableInterpreter::convert_string_to_representable_integer_var( + token_view.to_string(), + encoded_var + )) + { + variable_dictionary_id_t id{}; + m_var_dict.add_entry(token_view.to_string(), id); + encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id); + m_logtype_dict_entry.add_dictionary_var(); + } else { + m_logtype_dict_entry.add_int_var(); + } + m_encoded_vars.push_back(encoded_var); + break; + } + case static_cast(log_surgeon::SymbolId::TokenFloat): { + encoded_variable_t encoded_var{}; + if (false + == EncodedVariableInterpreter::convert_string_to_representable_float_var( + token_view.to_string(), + encoded_var + )) + { + variable_dictionary_id_t id{}; + m_var_dict.add_entry(token_view.to_string(), id); + encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id); + m_logtype_dict_entry.add_dictionary_var(); + } else { + m_logtype_dict_entry.add_float_var(); + } + m_encoded_vars.push_back(encoded_var); + break; + } + default: { + // If there are no capture groups the entire variable token is stored as a variable. + // If the variable token contains capture groups, we break the token up by storing + // each capture as a variable and any substrings surrounding the capture as part of + // the logtype. Capture repetition currently does not work so we explicitly only + // store the first capture. + + auto const& lexer{log_view.get_log_parser().m_lexer}; + auto capture_ids{lexer.get_capture_ids_from_rule_id(token_type)}; + if (false == capture_ids.has_value()) { + variable_dictionary_id_t id{}; + m_var_dict.add_entry(token_view.to_string(), id); + m_var_ids.push_back(id); + m_encoded_vars.push_back(EncodedVariableInterpreter::encode_var_dict_id(id)); + m_logtype_dict_entry.add_dictionary_var(); + break; + } + + auto const register_ids{lexer.get_reg_ids_from_capture_id(capture_ids.value().at(0))}; + if (false == register_ids.has_value()) { + throw(std::runtime_error( + "No register IDs found for variable's capture group. Full token: " + + token_view.to_string() + )); + } + + auto const [start_reg_id, end_reg_id]{register_ids.value()}; + auto const start_positions{token_view.get_reversed_reg_positions(start_reg_id)}; + auto const end_positions{token_view.get_reversed_reg_positions(end_reg_id)}; + + if (false == start_positions.empty() && -1 < start_positions[0] + && false == end_positions.empty() && -1 < end_positions[0]) + { + auto token_end{token_view.get_end_pos()}; + + token_view.set_end_pos(start_positions[0]); + auto const before_capture{token_view.to_string_view()}; + m_logtype_dict_entry.add_constant(before_capture, 0, before_capture.size()); + + token_view.set_start_pos(start_positions[0]); + token_view.set_end_pos(end_positions[0]); + + variable_dictionary_id_t id{}; + m_var_dict.add_entry(token_view.to_string_view(), id); + m_var_ids.push_back(id); + m_encoded_vars.push_back(EncodedVariableInterpreter::encode_var_dict_id(id)); + m_logtype_dict_entry.add_dictionary_var(); + + token_view.set_start_pos(end_positions[0]); + token_view.set_end_pos(token_end); + auto const after_capture{token_view.to_string_view()}; + m_logtype_dict_entry.add_constant(after_capture, 0, after_capture.size()); + } + break; + } + } +} + +void Archive::write_msg_using_schema(log_surgeon::LogEventView const& log_view) { + epochtime_t timestamp{0}; + TimestampPattern const* timestamp_pattern{nullptr}; + auto const& log_buf = log_view.get_log_output_buffer(); + if (log_buf->has_timestamp()) { + size_t start{}; + size_t end{}; + timestamp_pattern = TimestampPattern::search_known_ts_patterns( + log_buf->get_mutable_token(0).to_string(), timestamp, start, end @@ -331,14 +441,16 @@ void Archive::write_msg_using_schema(LogEventView const& log_view) { if (nullptr == timestamp_pattern) { throw(std::runtime_error( "Schema contains a timestamp regex that matches " - + log_output_buffer->get_mutable_token(0).to_string() + + log_buf->get_mutable_token(0).to_string() + " which does not match any known timestamp pattern." )); } if (m_old_ts_pattern != timestamp_pattern) { change_ts_pattern(timestamp_pattern); - m_old_ts_pattern = timestamp_pattern; + m_old_ts_pattern = const_cast(timestamp_pattern); } + } else { + timestamp_pattern = nullptr; } if (get_data_size_of_dictionaries() >= m_target_data_size_of_dicts) { split_file_and_archive( @@ -354,89 +466,37 @@ void Archive::write_msg_using_schema(LogEventView const& log_view) { m_encoded_vars.clear(); m_var_ids.clear(); m_logtype_dict_entry.clear(); - size_t num_uncompressed_bytes = 0; + + size_t num_uncompressed_bytes{0}; // Timestamp is included in the uncompressed message size - uint32_t start_pos = log_output_buffer->get_token(0).m_start_pos; + auto start_pos{log_buf->get_token(0).get_start_pos()}; if (timestamp_pattern == nullptr) { - start_pos = log_output_buffer->get_token(1).m_start_pos; + start_pos = log_buf->get_token(1).get_start_pos(); } - uint32_t end_pos = log_output_buffer->get_token(log_output_buffer->pos() - 1).m_end_pos; + auto const end_pos{log_buf->get_token(log_buf->pos() - 1).get_end_pos()}; if (start_pos <= end_pos) { num_uncompressed_bytes = end_pos - start_pos; } else { - num_uncompressed_bytes - = log_output_buffer->get_token(0).m_buffer_size - start_pos + end_pos; + num_uncompressed_bytes = log_buf->get_token(0).get_buffer_size() - start_pos + end_pos; } - for (uint32_t i = 1; i < log_output_buffer->pos(); i++) { - log_surgeon::Token& token = log_output_buffer->get_mutable_token(i); - int token_type = token.m_type_ids_ptr->at(0); - if (log_output_buffer->has_delimiters() && (timestamp_pattern != nullptr || i > 1) + for (auto token_idx{1}; token_idx < log_buf->pos(); token_idx++) { + auto token_view{log_buf->get_token(token_idx)}; + auto const* type_ids{token_view.get_type_ids()}; + if (nullptr == type_ids || type_ids->empty()) { + throw std::runtime_error("Token has no type IDs: " + token_view.to_string()); + } + auto const token_type{type_ids->at(0)}; + if (log_buf->has_delimiters() && (timestamp_pattern != nullptr || token_idx > 1) && token_type != static_cast(log_surgeon::SymbolId::TokenUncaughtString) && token_type != static_cast(log_surgeon::SymbolId::TokenNewline)) { - m_logtype_dict_entry.add_constant(token.get_delimiter(), 0, 1); - if (token.m_start_pos == token.m_buffer_size - 1) { - token.m_start_pos = 0; - } else { - token.m_start_pos++; - } - } - switch (token_type) { - case static_cast(log_surgeon::SymbolId::TokenNewline): - case static_cast(log_surgeon::SymbolId::TokenUncaughtString): { - m_logtype_dict_entry.add_constant(token.to_string(), 0, token.get_length()); - break; - } - case static_cast(log_surgeon::SymbolId::TokenInt): { - encoded_variable_t encoded_var; - if (!EncodedVariableInterpreter::convert_string_to_representable_integer_var( - token.to_string(), - encoded_var - )) - { - variable_dictionary_id_t id; - m_var_dict.add_entry(token.to_string(), id); - encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id); - m_logtype_dict_entry.add_dictionary_var(); - } else { - m_logtype_dict_entry.add_int_var(); - } - m_encoded_vars.push_back(encoded_var); - break; - } - case static_cast(log_surgeon::SymbolId::TokenFloat): { - encoded_variable_t encoded_var; - if (!EncodedVariableInterpreter::convert_string_to_representable_float_var( - token.to_string(), - encoded_var - )) - { - variable_dictionary_id_t id; - m_var_dict.add_entry(token.to_string(), id); - encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id); - m_logtype_dict_entry.add_dictionary_var(); - } else { - m_logtype_dict_entry.add_float_var(); - } - m_encoded_vars.push_back(encoded_var); - break; - } - default: { - // Variable string looks like a dictionary variable, so encode it as so - encoded_variable_t encoded_var; - variable_dictionary_id_t id; - m_var_dict.add_entry(token.to_string(), id); - encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id); - m_var_ids.push_back(id); - - m_logtype_dict_entry.add_dictionary_var(); - m_encoded_vars.push_back(encoded_var); - break; - } + m_logtype_dict_entry.add_constant(token_view.get_delimiter(), 0, 1); + token_view.increment_start_pos(); } + add_token_to_dicts(log_view, token_view); } - if (!m_logtype_dict_entry.get_value().empty()) { - logtype_dictionary_id_t logtype_id; + if (false == m_logtype_dict_entry.get_value().empty()) { + logtype_dictionary_id_t logtype_id{}; m_logtype_dict.add_entry(m_logtype_dict_entry, logtype_id); m_file->write_encoded_msg( timestamp, diff --git a/components/core/src/clp/streaming_archive/writer/Archive.hpp b/components/core/src/clp/streaming_archive/writer/Archive.hpp index 2b589881a5..b1351199a2 100644 --- a/components/core/src/clp/streaming_archive/writer/Archive.hpp +++ b/components/core/src/clp/streaming_archive/writer/Archive.hpp @@ -12,7 +12,6 @@ #include #include #include -#include #include "../../ArrayBackedPosIntSet.hpp" #include "../../ErrorCode.hpp" @@ -150,7 +149,7 @@ class Archive { * @param log_event_view * @throw FileWriter::OperationFailed if any write fails */ - void write_msg_using_schema(log_surgeon::LogEventView const& log_event_view); + auto write_msg_using_schema(log_surgeon::LogEventView const& log_view) -> void; /** * Writes an IR log event to the current encoded file @@ -290,6 +289,15 @@ class Archive { */ auto update_global_metadata() -> void; + /** + * Inspect a log surgeon token and add its information to the logtype and variable dictionaries. + * @param log_view The log event containing the token. + * @param token_view The token to add to the dictionaries. + */ + auto + add_token_to_dicts(log_surgeon::LogEventView const& log_view, log_surgeon::Token token_view) + -> void; + // Variables boost::uuids::uuid m_id; std::string m_id_as_string; diff --git a/components/core/tests/test-ParserWithUserSchema.cpp b/components/core/tests/test-ParserWithUserSchema.cpp index 3ab9bfad72..b51d212f9a 100644 --- a/components/core/tests/test-ParserWithUserSchema.cpp +++ b/components/core/tests/test-ParserWithUserSchema.cpp @@ -1,37 +1,49 @@ // TODO: move this test to log_surgeon // TODO: move load_lexer_from_file into SearchParser in log_surgeon -#include - #include #include +#include #include +#include #include #include +#include +#include #include #include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include -#include "../src/clp/clp/run.hpp" -#include "../src/clp/GlobalMySQLMetadataDB.hpp" -#include "../src/clp/LogSurgeonReader.hpp" -#include "../src/clp/Utils.hpp" +#include "TestOutputCleaner.hpp" -using clp::FileReader; using clp::load_lexer_from_file; -using clp::LogSurgeonReader; -using log_surgeon::DelimiterStringAST; using log_surgeon::lexers::ByteLexer; using log_surgeon::LogParser; -using log_surgeon::ParserAST; using log_surgeon::SchemaAST; -using log_surgeon::SchemaVarAST; -using log_surgeon::Token; namespace { +constexpr std::string_view cTestArchiveDirectory{"test-parser-with-user-schema-archive"}; + +auto run_clp_compress( + std::filesystem::path const& schema_path, + std::filesystem::path const& output_path, + std::filesystem::path const& input_path +) -> int; [[nodiscard]] auto get_tests_dir() -> std::filesystem::path; [[nodiscard]] auto get_test_schema_files_dir() -> std::filesystem::path; [[nodiscard]] auto get_test_queries_dir() -> std::filesystem::path; +[[nodiscard]] auto get_test_log_dir() -> std::filesystem::path; auto get_tests_dir() -> std::filesystem::path { std::filesystem::path const current_file_path{__FILE__}; @@ -45,6 +57,30 @@ auto get_test_schema_files_dir() -> std::filesystem::path { auto get_test_queries_dir() -> std::filesystem::path { return get_tests_dir() / "test_search_queries"; } + +auto get_test_log_dir() -> std::filesystem::path { + return get_tests_dir() / "test_log_files"; +} + +auto run_clp_compress( + std::filesystem::path const& schema_path, + std::filesystem::path const& output_path, + std::filesystem::path const& input_path +) -> int { + auto const schema_path_str{schema_path.string()}; + auto const output_path_str{output_path.string()}; + auto const input_path_str{input_path.string()}; + std::vector argv{ + "clp", + "c", + "--schema-path", + schema_path_str.data(), + output_path_str.data(), + input_path_str.data(), + nullptr + }; + return clp::clp::run(static_cast(argv.size() - 1), argv.data()); +} } // namespace std::unique_ptr generate_schema_ast(std::string const& schema_file) { @@ -60,43 +96,6 @@ std::unique_ptr generate_log_parser(std::string const& schema_file) { return log_parser; } -void compress( - std::string const& output_dir, - std::string const& file_to_compress, - std::string schema_file, - bool old = false -) { - std::vector arguments; - if (old) { - arguments = {"main.cpp", "c", output_dir, file_to_compress}; - } else { - arguments - = {"main.cpp", - "c", - output_dir, - file_to_compress, - "--schema-path", - std::move(schema_file)}; - } - std::vector argv; - for (auto const& arg : arguments) { - argv.push_back(arg.data()); - } - argv.push_back(nullptr); - clp::clp::run(argv.size() - 1, argv.data()); -} - -void decompress(std::string archive_dir, std::string output_dir) { - std::vector arguments - = {"main.cpp", "x", std::move(archive_dir), std::move(output_dir)}; - std::vector argv; - for (auto const& arg : arguments) { - argv.push_back(arg.data()); - } - argv.push_back(nullptr); - clp::clp::run(argv.size() - 1, argv.data()); -} - TEST_CASE("Test error for missing schema file", "[LALR1Parser][SchemaParser]") { auto const file_path = get_test_schema_files_dir() / "missing_schema.txt"; auto const file_path_string = file_path.string(); @@ -156,49 +155,86 @@ TEST_CASE("Test creating log parser without delimiters", "[LALR1Parser][LogParse ); } -// TODO: This test doesn't currently work because delimiters are allowed in -// schema files, and there is no option to disable this yet -// TEST_CASE("Test error for creating log file with delimiter in regex pattern", -// "[LALR1Parser]SchemaParser]") { -// std::string file_path = "../tests/test_schema_files/schema_with_delimiter_in_regex_error.txt"; -// std::string file_name = boost::filesystem::canonical(file_path).string(); -// REQUIRE_THROWS_WITH(generate_log_parser(file_path), -// file_name + -// ":2: error: 'equals' has regex pattern which contains delimiter '='.\n" -// + " equals:.*=.*\n" -// + " ^^^^^\n"); -//} - -// TODO: This error check is performed correctly by CLP, but it is handled by -// something different now so this test will fail as is -// TEST_CASE("Test error for missing log file", "[LALR1Parser][LogParser]") { -// std::string file_name = "../tests/test_log_files/missing_log.txt"; -// std::string file_path = boost::filesystem::weakly_canonical(file_name).string(); -// REQUIRE_THROWS(compress("../tests/test_archives", file_name, -// "../tests/test_schema_files/schema_that_does_not_exist.txt"), -// "Specified schema file does not exist."); -//} - TEST_CASE("Test lexer", "[Search]") { ByteLexer lexer; auto const schema_file_path = get_test_schema_files_dir() / "search_schema.txt"; load_lexer_from_file(schema_file_path.string(), lexer); auto const query_file_path = get_test_queries_dir() / "easy.txt"; - FileReader file_reader{query_file_path.string()}; - LogSurgeonReader reader_wrapper(file_reader); + clp::FileReader file_reader{query_file_path.string()}; + clp::LogSurgeonReader reader_wrapper(file_reader); log_surgeon::ParserInputBuffer parser_input_buffer; parser_input_buffer.read_if_safe(reader_wrapper); lexer.reset(); auto [error_code, opt_token] = lexer.scan(parser_input_buffer); REQUIRE(error_code == log_surgeon::ErrorCode::Success); - Token token{opt_token.value()}; - while (token.m_type_ids_ptr->at(0) != static_cast(log_surgeon::SymbolId::TokenEnd)) { + auto token{opt_token.value()}; + while (token.get_type_ids()->at(0) != static_cast(log_surgeon::SymbolId::TokenEnd)) { SPDLOG_INFO("token:" + token.to_string() + "\n"); SPDLOG_INFO( - "token.m_type_ids->back():" + lexer.m_id_symbol[token.m_type_ids_ptr->back()] + "\n" + "token.get_type_ids()->back():" + lexer.m_id_symbol[token.get_type_ids()->back()] + + "\n" ); auto [error_code, opt_token] = lexer.scan(parser_input_buffer); REQUIRE(error_code == log_surgeon::ErrorCode::Success); token = opt_token.value(); } } + +TEST_CASE("Test schema with single capture group", "[load_lexer]") { + auto const schema_file_path{get_test_schema_files_dir() / "single_capture_group.txt"}; + ByteLexer lexer; + load_lexer_from_file(schema_file_path, lexer); + + auto const rule_id{lexer.m_symbol_id.at("capture")}; + auto const capture_ids{lexer.get_capture_ids_from_rule_id(rule_id)}; + REQUIRE(capture_ids.has_value()); + REQUIRE(1 == capture_ids->size()); + REQUIRE("group" == lexer.m_id_symbol.at(capture_ids->at(0))); +} + +TEST_CASE("Error on schema rule with multiple capture groups", "[load_lexer]") { + auto const schema_file_path{get_test_schema_files_dir() / "multiple_capture_groups.txt"}; + ByteLexer lexer; + REQUIRE_THROWS_WITH( + load_lexer_from_file(schema_file_path, lexer), + schema_file_path.string() + + ":3: error: the schema rule 'multicapture' has a regex pattern containing > " + "1 capture groups (found 2).\n" + ); +} + +TEST_CASE("Verify dictionary contents", "[Compression]") { + auto const log_file_path{get_test_log_dir() / "log_with_capture.txt"}; + auto const schema_file_path{get_test_schema_files_dir() / "single_capture_group.txt"}; + TestOutputCleaner const cleaner{{std::string{cTestArchiveDirectory}}}; + std::filesystem::create_directory(cTestArchiveDirectory); + + REQUIRE(0 == run_clp_compress(schema_file_path, cTestArchiveDirectory, log_file_path)); + + std::vector archive_paths; + for (auto const& entry : std::filesystem::directory_iterator{cTestArchiveDirectory}) { + auto const& path{entry.path()}; + if (false == path.string().ends_with(clp::streaming_archive::cMetadataDBFileName)) { + archive_paths.emplace_back(path); + } + } + REQUIRE(1 == archive_paths.size()); + + clp::streaming_archive::reader::Archive archive_reader; + archive_reader.open(archive_paths.at(0)); + archive_reader.refresh_dictionaries(); + + auto const& logtype_dict{archive_reader.get_logtype_dictionary()}; + REQUIRE(1 == logtype_dict.get_entries().size()); + REQUIRE(fmt::format( + "2016-05-08 07:34:05.251 MyDog{} APet{} test.txt\n", + clp::enum_to_underlying_type(clp::ir::VariablePlaceholder::Dictionary), + clp::enum_to_underlying_type(clp::ir::VariablePlaceholder::Dictionary) + ) + == logtype_dict.get_value(0)); + + auto const& var_dict{archive_reader.get_var_dictionary()}; + REQUIRE(2 == var_dict.get_entries().size()); + REQUIRE("123" == var_dict.get_value(0)); + REQUIRE("4123" == var_dict.get_value(1)); +} diff --git a/components/core/tests/test_log_files/log_with_capture.txt b/components/core/tests/test_log_files/log_with_capture.txt new file mode 100644 index 0000000000..6210ea8bdc --- /dev/null +++ b/components/core/tests/test_log_files/log_with_capture.txt @@ -0,0 +1 @@ +2016-05-08 07:34:05.251 MyDog123 APet4123 test.txt diff --git a/components/core/tests/test_schema_files/multiple_capture_groups.txt b/components/core/tests/test_schema_files/multiple_capture_groups.txt new file mode 100644 index 0000000000..09998bc19d --- /dev/null +++ b/components/core/tests/test_schema_files/multiple_capture_groups.txt @@ -0,0 +1,3 @@ +delimiters: \r\n + +multicapture:text(?var0)text(?var1)text diff --git a/components/core/tests/test_schema_files/search_schema.txt b/components/core/tests/test_schema_files/search_schema.txt index 60d0c12f00..f49a6dbfa4 100644 --- a/components/core/tests/test_schema_files/search_schema.txt +++ b/components/core/tests/test_schema_files/search_schema.txt @@ -1,5 +1,5 @@ // Delimiters -delimiters: \r\n:,=!;%\? +delimiters: \r\n:,=!;%? // First set of variables timestamp:[0-9]{4}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{3}){0,1} diff --git a/components/core/tests/test_schema_files/single_capture_group.txt b/components/core/tests/test_schema_files/single_capture_group.txt new file mode 100644 index 0000000000..b9543fbdcc --- /dev/null +++ b/components/core/tests/test_schema_files/single_capture_group.txt @@ -0,0 +1,3 @@ +delimiters: \r\n + +capture:[A-Za-z]+(?\d+) diff --git a/taskfiles/deps/main.yaml b/taskfiles/deps/main.yaml index 927cedb2a1..a5a4785b26 100644 --- a/taskfiles/deps/main.yaml +++ b/taskfiles/deps/main.yaml @@ -367,8 +367,8 @@ tasks: - "-DCMAKE_INSTALL_MESSAGE=LAZY" - "-Dlog_surgeon_BUILD_TESTING=OFF" LIB_NAME: "log_surgeon" - TARBALL_SHA256: "4551ea50cd22e8423770fd66a167e1c86053b1f4957f72c582a2da93e7820210" - TARBALL_URL: "https://github.com/y-scope/log-surgeon/archive/840f262.tar.gz" + TARBALL_SHA256: "396ef8822e687b0bb8ca433a360e9e1d39a51620df89cac606db5e0f49d1b265" + TARBALL_URL: "https://github.com/y-scope/log-surgeon/archive/1135c2e.tar.gz" lz4: internal: true