From 6867e86fb268c2847a5052b20d32614624ffb951 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:35:55 -0400 Subject: [PATCH 01/11] Remove strange timestamp pattern; Refactor TimestampPattern unit tests; Add missing unit tests for known TimestampPatterns. --- components/core/src/clp/TimestampPattern.cpp | 3 - .../core/tests/test-TimestampPattern.cpp | 910 ++++++++---------- 2 files changed, 408 insertions(+), 505 deletions(-) diff --git a/components/core/src/clp/TimestampPattern.cpp b/components/core/src/clp/TimestampPattern.cpp index c7a8bf78fb..1fad55d592 100644 --- a/components/core/src/clp/TimestampPattern.cpp +++ b/components/core/src/clp/TimestampPattern.cpp @@ -144,7 +144,6 @@ void TimestampPattern::init() { patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3"); // E.g. 01 Jan 2016 15:50:17,085 patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3"); - // E.g. 2015-01-31T15:50:45 patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S"); // E.g. 2015-01-31 15:50:45 @@ -157,8 +156,6 @@ void TimestampPattern::init() { patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S"); // E.g. [20170106-16:56:41] patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]"); - // E.g. Start-Date: 2015-01-31 15:50:45 - patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S"); // E.g. 15/01/31 15:50:45 patterns.emplace_back(0, "%y/%m/%d %H:%M:%S"); // E.g. 150131 9:50:45 diff --git a/components/core/tests/test-TimestampPattern.cpp b/components/core/tests/test-TimestampPattern.cpp index 0b9dc54cf4..74f77949a4 100644 --- a/components/core/tests/test-TimestampPattern.cpp +++ b/components/core/tests/test-TimestampPattern.cpp @@ -6,546 +6,452 @@ using clp::epochtime_t; using clp::TimestampPattern; using std::string; -TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { - TimestampPattern::init(); - - string line; - TimestampPattern const* pattern; - epochtime_t timestamp; - size_t timestamp_begin_pos; - size_t timestamp_end_pos; - string content; - - line = "2015-02-01T01:02:03.004 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, +namespace { +/** + * Parses a timestamp with the given pattern from the given line and validates the result. + * @param line Line to parse timestamp from + * @param pattern Pattern of timestamp to parse + * @param expected_timestamp + * @param expected_timestamp_begin_pos + * @param expected_timestamp_end_pos + * @param expected_restored_line Expected line after restoring timestamp + */ +void parse_and_validate_timestamp_pattern( + string const& line, + TimestampPattern const& pattern, + epochtime_t expected_timestamp, + size_t expected_timestamp_begin_pos, + size_t expected_timestamp_end_pos, + string const& expected_restored_line +); + +/** + * Validates that the given line is parsed with the expected timestamp pattern. + * @param line + * @param expected_timestamp_pattern + * @param expected_timestamp + * @param expected_timestamp_begin_pos + * @param expected_timestamp_end_pos + */ +void search_and_validate_timestamp_pattern( + string const& line, + TimestampPattern const& expected_timestamp_pattern, + epochtime_t expected_timestamp, + size_t expected_timestamp_begin_pos, + size_t expected_timestamp_end_pos +); + +/** + * Validates formatting and inserting a timestamp into a line. + * @param line Original line + * @param timestamp_begin_pos + * @param timestamp_end_pos + * @param timestamp + * @param pattern + * @param expected_restored_line + */ +void validate_inserting_formatted_timestamp( + string const& line, + size_t timestamp_begin_pos, + size_t timestamp_end_pos, + epochtime_t timestamp, + TimestampPattern const& pattern, + string const& expected_restored_line +); + +/** + * Validates the result of parsing a timestamp. + * @param expected_timestamp + * @param expected_timestamp_begin_pos + * @param expected_timestamp_end_pos + * @param timestamp + * @param timestamp_begin_pos + * @param timestamp_end_pos + */ +void validate_timestamp_parsing_result( + epochtime_t expected_timestamp, + size_t expected_timestamp_begin_pos, + size_t expected_timestamp_end_pos, + epochtime_t timestamp, + size_t timestamp_begin_pos, + size_t timestamp_end_pos +); + +void parse_and_validate_timestamp_pattern( + string const& line, + TimestampPattern const& pattern, + epochtime_t expected_timestamp, + size_t expected_timestamp_begin_pos, + size_t expected_timestamp_end_pos, + string const& expected_restored_line +) { + epochtime_t timestamp{0}; + size_t timestamp_begin_pos{0}; + size_t timestamp_end_pos{0}; + pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); + + validate_timestamp_parsing_result( + expected_timestamp, + expected_timestamp_begin_pos, + expected_timestamp_end_pos, timestamp, timestamp_begin_pos, timestamp_end_pos ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%Y-%m-%dT%H:%M:%S.%3"); - REQUIRE(1'422'752'523'004 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015-02-01T01:02:03,004 content after"; - pattern = TimestampPattern::search_known_ts_patterns( + validate_inserting_formatted_timestamp( line, - timestamp, timestamp_begin_pos, - timestamp_end_pos - ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%Y-%m-%dT%H:%M:%S,%3"); - REQUIRE(1'422'752'523'004 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "[2015-02-01T01:02:03 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, + timestamp_end_pos, timestamp, - timestamp_begin_pos, - timestamp_end_pos + pattern, + expected_restored_line ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "[%Y-%m-%dT%H:%M:%S"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(20 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "[20150201-01:02:03] content after"; - pattern = TimestampPattern::search_known_ts_patterns( +} + +void search_and_validate_timestamp_pattern( + string const& line, + TimestampPattern const& expected_timestamp_pattern, + epochtime_t expected_timestamp, + size_t expected_timestamp_begin_pos, + size_t expected_timestamp_end_pos +) { + epochtime_t timestamp{0}; + size_t timestamp_begin_pos{0}; + size_t timestamp_end_pos{0}; + auto* pattern = TimestampPattern::search_known_ts_patterns( line, timestamp, timestamp_begin_pos, timestamp_end_pos ); REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "[%Y%m%d-%H:%M:%S]"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(19 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015-02-01 01:02:03,004 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, + REQUIRE(pattern->get_num_spaces_before_ts() + == expected_timestamp_pattern.get_num_spaces_before_ts()); + REQUIRE(pattern->get_format() == expected_timestamp_pattern.get_format()); + + validate_timestamp_parsing_result( + expected_timestamp, + expected_timestamp_begin_pos, + expected_timestamp_end_pos, timestamp, timestamp_begin_pos, timestamp_end_pos ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%Y-%m-%d %H:%M:%S,%3"); - REQUIRE(1'422'752'523'004 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015-02-01 01:02:03.004 content after"; - pattern = TimestampPattern::search_known_ts_patterns( + validate_inserting_formatted_timestamp( line, - timestamp, timestamp_begin_pos, - timestamp_end_pos - ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%Y-%m-%d %H:%M:%S.%3"); - REQUIRE(1'422'752'523'004 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "[2015-02-01 01:02:03,004] content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, + timestamp_end_pos, timestamp, - timestamp_begin_pos, - timestamp_end_pos + *pattern, + line ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "[%Y-%m-%d %H:%M:%S,%3]"); - REQUIRE(1'422'752'523'004 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(25 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015-02-01 01:02:03 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos +} + +void validate_inserting_formatted_timestamp( + string const& line, + size_t timestamp_begin_pos, + size_t timestamp_end_pos, + epochtime_t timestamp, + TimestampPattern const& pattern, + string const& expected_restored_line +) { + // Generate the line without the timestamp + string restored_line; + restored_line.assign(line, 0, timestamp_begin_pos); + restored_line.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); + + pattern.insert_formatted_timestamp(timestamp, restored_line); + + REQUIRE(expected_restored_line == restored_line); +} + +void validate_timestamp_parsing_result( + epochtime_t expected_timestamp, + size_t expected_timestamp_begin_pos, + size_t expected_timestamp_end_pos, + epochtime_t timestamp, + size_t timestamp_begin_pos, + size_t timestamp_end_pos +) { + REQUIRE(expected_timestamp == timestamp); + REQUIRE(expected_timestamp_begin_pos == timestamp_begin_pos); + REQUIRE(expected_timestamp_end_pos == timestamp_end_pos); +} +} // namespace + +TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { + TimestampPattern::init(); + + search_and_validate_timestamp_pattern( + "2015-02-01T01:02:03.004 content after", + {0, "%Y-%m-%dT%H:%M:%S.%3"}, + 1'422'752'523'004, + 0, + 23 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%Y-%m-%d %H:%M:%S"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(19 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015/02/01 01:02:03 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015-02-01T01:02:03,004 content after", + {0, "%Y-%m-%dT%H:%M:%S,%3"}, + 1'422'752'523'004, + 0, + 23 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%Y/%m/%d %H:%M:%S"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(19 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "15/02/01 01:02:03 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015-02-01 01:02:03.004 content after", + {0, "%Y-%m-%d %H:%M:%S.%3"}, + 1'422'752'523'004, + 0, + 23 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%y/%m/%d %H:%M:%S"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(17 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "150201 1:02:03 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015-02-01 01:02:03,004 content after", + {0, "%Y-%m-%d %H:%M:%S,%3"}, + 1'422'752'523'004, + 0, + 23 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%y%m%d %k:%M:%S"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(15 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "01 Feb 2015 01:02:03,004 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015/01/31T15:50:45.123 content after", + {0, "%Y/%m/%dT%H:%M:%S.%3"}, + 1'422'719'445'123, + 0, + 23 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%d %b %Y %H:%M:%S,%3"); - REQUIRE(1'422'752'523'004 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(24 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "Feb 01, 2015 1:02:03 AM content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015/01/31T15:50:45,123 content after", + {0, "%Y/%m/%dT%H:%M:%S,%3"}, + 1'422'719'445'123, + 0, + 23 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%b %d, %Y %l:%M:%S %p"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(24 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "February 01, 2015 01:02 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015/01/31 15:50:45.123 content after", + {0, "%Y/%m/%d %H:%M:%S.%3"}, + 1'422'719'445'123, + 0, + 23 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%B %d, %Y %H:%M"); - REQUIRE(1'422'752'520'000 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "E [01/Feb/2015:01:02:03 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015/01/31 15:50:45,123 content after", + {0, "%Y/%m/%d %H:%M:%S,%3"}, + 1'422'719'445'123, + 0, + 23 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 1); - REQUIRE(pattern->get_format() == "[%d/%b/%Y:%H:%M:%S"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(2 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "localhost - - [01/Feb/2015:01:02:03 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "[2015-02-01 01:02:03,004] content after", + {0, "[%Y-%m-%d %H:%M:%S,%3]"}, + 1'422'752'523'004, + 0, + 25 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 3); - REQUIRE(pattern->get_format() == "[%d/%b/%Y:%H:%M:%S"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(14 == timestamp_begin_pos); - REQUIRE(35 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "INFO [main] 2015-02-01 01:02:03,004 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "INFO [main] 2015-02-01 01:02:03,004 content after", + {2, "%Y-%m-%d %H:%M:%S,%3"}, + 1'422'752'523'004, + 12, + 35 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 2); - REQUIRE(pattern->get_format() == "%Y-%m-%d %H:%M:%S,%3"); - REQUIRE(1'422'752'523'004 == timestamp); - REQUIRE(12 == timestamp_begin_pos); - REQUIRE(35 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "Started POST \"/api/v3/internal/allowed\" for 127.0.0.1 at 2015-02-01 01:02:03 content " - "after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "<<<2015-02-01 01:02:03:004 content after", + {0, "<<<%Y-%m-%d %H:%M:%S:%3"}, + 1'422'752'523'004, + 0, + 26 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 6); - REQUIRE(pattern->get_format() == "%Y-%m-%d %H:%M:%S"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(57 == timestamp_begin_pos); - REQUIRE(76 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "update-alternatives 2015-02-01 01:02:03 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "01 Feb 2015 01:02:03,004 content after", + {0, "%d %b %Y %H:%M:%S,%3"}, + 1'422'752'523'004, + 0, + 24 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 1); - REQUIRE(pattern->get_format() == "%Y-%m-%d %H:%M:%S"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(20 == timestamp_begin_pos); - REQUIRE(39 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "ERROR: apport (pid 4557) Sun Feb 1 01:02:03 2015 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015-01-31T15:50:45 content after", + {0, "%Y-%m-%dT%H:%M:%S"}, + 1'422'719'445'000, + 0, + 19 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 4); - REQUIRE(pattern->get_format() == "%a %b %e %H:%M:%S %Y"); - REQUIRE(1'422'752'523'000 == timestamp); - REQUIRE(25 == timestamp_begin_pos); - REQUIRE(49 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "<<<2015-02-01 01:02:03:004 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015-02-01 01:02:03 content after", + {0, "%Y-%m-%d %H:%M:%S"}, + 1'422'752'523'000, + 0, + 19 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "<<<%Y-%m-%d %H:%M:%S:%3"); - REQUIRE(1'422'752'523'004 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(26 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "Jan 21 11:56:42"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015/01/31T15:50:45 content after", + {0, "%Y/%m/%dT%H:%M:%S"}, + 1'422'719'445'000, + 0, + 19 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%b %d %H:%M:%S"); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(15 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "01-21 11:56:42.392"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "2015/02/01 01:02:03 content after", + {0, "%Y/%m/%d %H:%M:%S"}, + 1'422'752'523'000, + 0, + 19 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%m-%d %H:%M:%S.%3"); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(18 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "626515123 content after"; - pattern = TimestampPattern::search_known_ts_patterns( - line, - timestamp, - timestamp_begin_pos, - timestamp_end_pos + + search_and_validate_timestamp_pattern( + "[2015-02-01T01:02:03 content after", + {0, "[%Y-%m-%dT%H:%M:%S"}, + 1'422'752'523'000, + 0, + 20 ); - REQUIRE(nullptr != pattern); - REQUIRE(pattern->get_num_spaces_before_ts() == 0); - REQUIRE(pattern->get_format() == "%#3"); - REQUIRE(626'515'123 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(9 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern->insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - // The patterns below overlap with the known timestamp patterns, so we can only test them by - // specifying them manually - // NOTE: Since the timestamp's stored by CLP are in milliseconds right now, microsecond and + + search_and_validate_timestamp_pattern( + "[20150201-01:02:03] content after", + {0, "[%Y%m%d-%H:%M:%S]"}, + 1'422'752'523'000, + 0, + 19 + ); + + search_and_validate_timestamp_pattern( + "15/02/01 01:02:03 content after", + {0, "%y/%m/%d %H:%M:%S"}, + 1'422'752'523'000, + 0, + 17 + ); + + search_and_validate_timestamp_pattern( + "150201 1:02:03 content after", + {0, "%y%m%d %k:%M:%S"}, + 1'422'752'523'000, + 0, + 15 + ); + + search_and_validate_timestamp_pattern( + "Feb 01, 2015 1:02:03 AM content after", + {0, "%b %d, %Y %l:%M:%S %p"}, + 1'422'752'523'000, + 0, + 24 + ); + + search_and_validate_timestamp_pattern( + "February 01, 2015 01:02 content after", + {0, "%B %d, %Y %H:%M"}, + 1'422'752'520'000, + 0, + 23 + ); + + search_and_validate_timestamp_pattern( + "E [01/Feb/2015:01:02:03 content after", + {1, "[%d/%b/%Y:%H:%M:%S"}, + 1'422'752'523'000, + 2, + 23 + ); + + search_and_validate_timestamp_pattern( + "localhost - - [01/Feb/2015:01:02:03 content after", + {3, "[%d/%b/%Y:%H:%M:%S"}, + 1'422'752'523'000, + 14, + 35 + ); + + search_and_validate_timestamp_pattern( + "localhost - - [01/02/2015:01:02:03 content after", + {3, "[%d/%m/%Y:%H:%M:%S"}, + 1'422'752'523'000, + 14, + 34 + ); + + search_and_validate_timestamp_pattern( + "Started POST \"/api/v3/internal/allowed\" for 127.0.0.1 at 2015-02-01 01:02:03 " + "content after", + {6, "%Y-%m-%d %H:%M:%S"}, + 1'422'752'523'000, + 57, + 76 + ); + + search_and_validate_timestamp_pattern( + "update-alternatives 2015-02-01 01:02:03 content after", + {1, "%Y-%m-%d %H:%M:%S"}, + 1'422'752'523'000, + 20, + 39 + ); + + search_and_validate_timestamp_pattern( + "ERROR: apport (pid 4557) Sun Feb 1 01:02:03 2015 content after", + {4, "%a %b %e %H:%M:%S %Y"}, + 1'422'752'523'000, + 25, + 49 + ); + + search_and_validate_timestamp_pattern( + "Sun Feb 1 01:02:03 2015 content after", + {0, "%a %b %e %H:%M:%S %Y"}, + 1'422'752'523'000, + 0, + 24 + ); + + search_and_validate_timestamp_pattern( + "Jan 21 11:56:42", + {0, "%b %d %H:%M:%S"}, + 1'771'002'000, + 0, + 15 + ); + + search_and_validate_timestamp_pattern( + "01-21 11:56:42.392", + {0, "%m-%d %H:%M:%S.%3"}, + 1'771'002'392, + 0, + 18 + ); + + search_and_validate_timestamp_pattern("626515123 content after", {0, "%#3"}, 626'515'123, 0, 9); + + // Inputs for the patterns below get recognized as other timestamp patterns, so we can only test + // the patterns by specifying them manually. + // NOTE: Since CLP currently stores timestamps with millisecond resolution, microsecond and // nanosecond-precision timestamps get truncated. - line = "626515123 content after"; - auto specific_pattern = TimestampPattern{0, "%#6"}; - specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); - REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); - REQUIRE(specific_pattern.get_format() == "%#6"); - REQUIRE(626'515 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(9 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - specific_pattern.insert_formatted_timestamp(timestamp, content); - REQUIRE("626515000 content after" == content); - - line = "626515123 content after"; - specific_pattern = TimestampPattern{0, "%#9"}; - specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); - REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); - REQUIRE(specific_pattern.get_format() == "%#9"); - REQUIRE(626 == timestamp); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(9 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - specific_pattern.insert_formatted_timestamp(timestamp, content); - REQUIRE("626000000 content after" == content); - - line = "2015/01/31 15:50:45.123 content after"; - specific_pattern = TimestampPattern{0, "%Y/%m/%d %H:%M:%S.%3"}; - specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); - REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); - REQUIRE(specific_pattern.get_format() == "%Y/%m/%d %H:%M:%S.%3"); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - specific_pattern.insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015/01/31 15:50:45,123 content after"; - specific_pattern = TimestampPattern{0, "%Y/%m/%d %H:%M:%S,%3"}; - specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); - REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); - REQUIRE(specific_pattern.get_format() == "%Y/%m/%d %H:%M:%S,%3"); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - specific_pattern.insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015/01/31T15:50:45 content after"; - specific_pattern = TimestampPattern{0, "%Y/%m/%dT%H:%M:%S"}; - specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); - REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); - REQUIRE(specific_pattern.get_format() == "%Y/%m/%dT%H:%M:%S"); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(19 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - specific_pattern.insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015/01/31T15:50:45.123 content after"; - specific_pattern = TimestampPattern{0, "%Y/%m/%dT%H:%M:%S.%3"}; - specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); - REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); - REQUIRE(specific_pattern.get_format() == "%Y/%m/%dT%H:%M:%S.%3"); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - specific_pattern.insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015/01/31T15:50:45,123 content after"; - specific_pattern = TimestampPattern{0, "%Y/%m/%dT%H:%M:%S,%3"}; - specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); - REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); - REQUIRE(specific_pattern.get_format() == "%Y/%m/%dT%H:%M:%S,%3"); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(23 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - specific_pattern.insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); - - line = "2015-01-31T15:50:45 content after"; - specific_pattern = TimestampPattern{0, "%Y-%m-%dT%H:%M:%S"}; - specific_pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); - REQUIRE(specific_pattern.get_num_spaces_before_ts() == 0); - REQUIRE(specific_pattern.get_format() == "%Y-%m-%dT%H:%M:%S"); - REQUIRE(0 == timestamp_begin_pos); - REQUIRE(19 == timestamp_end_pos); - content.assign(line, 0, timestamp_begin_pos); - content.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - specific_pattern.insert_formatted_timestamp(timestamp, content); - REQUIRE(line == content); + parse_and_validate_timestamp_pattern( + "626515123 content after", + {0, "%#6"}, + 626'515, + 0, + 9, + "626515000 content after" + ); + + parse_and_validate_timestamp_pattern( + "626515123 content after", + {0, "%#9"}, + 626, + 0, + 9, + "626000000 content after" + ); } From 8709aeec5ddbce29c901cbb7bef51409eadc3217 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:49:33 -0400 Subject: [PATCH 02/11] Add UtcOffset to timestamp parsing methods without parsing it and without storing it. --- components/core/src/clp/Defs.h | 3 + components/core/src/clp/MessageParser.cpp | 12 +++- components/core/src/clp/ParsedMessage.cpp | 15 +++- components/core/src/clp/ParsedMessage.hpp | 14 +++- components/core/src/clp/TimestampPattern.cpp | 18 ++++- components/core/src/clp/TimestampPattern.hpp | 8 ++- .../clp/streaming_archive/reader/Archive.cpp | 1 + .../clp/streaming_archive/writer/Archive.cpp | 2 + .../core/tests/test-TimestampPattern.cpp | 72 ++++++++++++++++++- 9 files changed, 133 insertions(+), 12 deletions(-) diff --git a/components/core/src/clp/Defs.h b/components/core/src/clp/Defs.h index f4a96ef9b4..5221a12596 100644 --- a/components/core/src/clp/Defs.h +++ b/components/core/src/clp/Defs.h @@ -2,6 +2,7 @@ #define CLP_DEFS_H #include +#include #include #include @@ -10,6 +11,8 @@ namespace clp { typedef int64_t epochtime_t; constexpr epochtime_t cEpochTimeMin = std::numeric_limits::min(); constexpr epochtime_t cEpochTimeMax = std::numeric_limits::max(); +// We use seconds resolution to support GPS clock offsets +using UtcOffset = std::chrono::seconds; typedef uint64_t variable_dictionary_id_t; constexpr variable_dictionary_id_t cVariableDictionaryIdMax diff --git a/components/core/src/clp/MessageParser.cpp b/components/core/src/clp/MessageParser.cpp index 666b7095a0..baa3830b87 100644 --- a/components/core/src/clp/MessageParser.cpp +++ b/components/core/src/clp/MessageParser.cpp @@ -1,5 +1,7 @@ #include "MessageParser.hpp" +#include + #include "Defs.h" #include "TimestampPattern.hpp" @@ -13,7 +15,7 @@ bool MessageParser::parse_next_message( size_t& buf_pos, ParsedMessage& message ) { - message.clear_except_ts_patt(); + message.clear_except_time_pattern_and_offset(); while (true) { // Check if the buffer was exhausted @@ -50,7 +52,7 @@ bool MessageParser::parse_next_message( ReaderInterface& reader, ParsedMessage& message ) { - message.clear_except_ts_patt(); + message.clear_except_time_pattern_and_offset(); while (true) { // Read message @@ -98,6 +100,7 @@ bool MessageParser::parse_line(ParsedMessage& message) { // Parse timestamp and content TimestampPattern const* timestamp_pattern = message.get_ts_patt(); epochtime_t timestamp = 0; + UtcOffset utc_offset{0}; size_t timestamp_begin_pos; size_t timestamp_end_pos; if (nullptr == timestamp_pattern @@ -105,6 +108,7 @@ bool MessageParser::parse_line(ParsedMessage& message) { == timestamp_pattern->parse_timestamp( m_line, timestamp, + utc_offset, timestamp_begin_pos, timestamp_end_pos )) @@ -112,6 +116,7 @@ bool MessageParser::parse_line(ParsedMessage& message) { timestamp_pattern = TimestampPattern::search_known_ts_patterns( m_line, timestamp, + utc_offset, timestamp_begin_pos, timestamp_end_pos ); @@ -124,6 +129,7 @@ bool MessageParser::parse_line(ParsedMessage& message) { m_buffered_msg.set( timestamp_pattern, timestamp, + utc_offset, m_line, timestamp_begin_pos, timestamp_end_pos @@ -136,6 +142,7 @@ bool MessageParser::parse_line(ParsedMessage& message) { m_buffered_msg.set( timestamp_pattern, timestamp, + utc_offset, m_line, timestamp_begin_pos, timestamp_end_pos @@ -149,6 +156,7 @@ bool MessageParser::parse_line(ParsedMessage& message) { message.set( timestamp_pattern, timestamp, + utc_offset, m_line, timestamp_begin_pos, timestamp_end_pos diff --git a/components/core/src/clp/ParsedMessage.cpp b/components/core/src/clp/ParsedMessage.cpp index e42ecd2a99..b373ff013f 100644 --- a/components/core/src/clp/ParsedMessage.cpp +++ b/components/core/src/clp/ParsedMessage.cpp @@ -5,12 +5,14 @@ using std::string; namespace clp { void ParsedMessage::clear() { m_ts_patt = nullptr; - clear_except_ts_patt(); + m_utc_offset = UtcOffset{0}; + clear_except_time_pattern_and_offset(); } -void ParsedMessage::clear_except_ts_patt() { +void ParsedMessage::clear_except_time_pattern_and_offset() { m_ts_patt_changed = false; m_ts = 0; + m_utc_offset_changed = false; m_content.clear(); m_orig_num_bytes = 0; m_is_set = false; @@ -19,6 +21,7 @@ void ParsedMessage::clear_except_ts_patt() { void ParsedMessage::set( TimestampPattern const* timestamp_pattern, epochtime_t const timestamp, + UtcOffset utc_offset, string const& line, size_t timestamp_begin_pos, size_t timestamp_end_pos @@ -28,6 +31,10 @@ void ParsedMessage::set( m_ts_patt_changed = true; } m_ts = timestamp; + if (utc_offset != m_utc_offset) { + m_utc_offset = utc_offset; + m_utc_offset_changed = true; + } if (timestamp_begin_pos == timestamp_end_pos) { m_content.assign(line); } else { @@ -49,6 +56,10 @@ void ParsedMessage::consume(ParsedMessage& message) { m_ts_patt_changed = true; } m_ts = message.m_ts; + if (message.m_utc_offset != m_utc_offset) { + m_utc_offset = message.m_utc_offset; + m_utc_offset_changed = true; + } m_content.swap(message.m_content); m_orig_num_bytes = message.m_orig_num_bytes; m_is_set = true; diff --git a/components/core/src/clp/ParsedMessage.hpp b/components/core/src/clp/ParsedMessage.hpp index 7ba5d42a56..d6a26b394f 100644 --- a/components/core/src/clp/ParsedMessage.hpp +++ b/components/core/src/clp/ParsedMessage.hpp @@ -17,6 +17,8 @@ class ParsedMessage { : m_ts_patt(nullptr), m_ts_patt_changed(false), m_ts(0), + m_utc_offset{}, + m_utc_offset_changed(false), m_content({}), m_orig_num_bytes(0), m_is_set(false) {} @@ -30,11 +32,15 @@ class ParsedMessage { // Methods void clear(); - void clear_except_ts_patt(); + /** + * Clears the parsed message except for the timestamp pattern and UTC offset. + */ + void clear_except_time_pattern_and_offset(); void set( TimestampPattern const* timestamp_pattern, epochtime_t timestamp, + UtcOffset utc_offset, std::string const& line, size_t timestamp_begin_pos, size_t timestamp_end_pos @@ -58,6 +64,10 @@ class ParsedMessage { bool has_ts_patt_changed() const { return m_ts_patt_changed; } + UtcOffset get_utc_offset() const { return m_utc_offset; } + + bool has_utc_offset_changed() const { return m_utc_offset_changed; } + bool is_empty() const { return false == m_is_set; } private: @@ -65,6 +75,8 @@ class ParsedMessage { TimestampPattern const* m_ts_patt; bool m_ts_patt_changed; epochtime_t m_ts; + UtcOffset m_utc_offset; + bool m_utc_offset_changed; std::string m_content; size_t m_orig_num_bytes; bool m_is_set; diff --git a/components/core/src/clp/TimestampPattern.cpp b/components/core/src/clp/TimestampPattern.cpp index 1fad55d592..9d31b72cfa 100644 --- a/components/core/src/clp/TimestampPattern.cpp +++ b/components/core/src/clp/TimestampPattern.cpp @@ -199,12 +199,18 @@ void TimestampPattern::init() { TimestampPattern const* TimestampPattern::search_known_ts_patterns( string const& line, epochtime_t& timestamp, + UtcOffset& utc_offset, size_t& timestamp_begin_pos, size_t& timestamp_end_pos ) { for (size_t i = 0; i < m_known_ts_patterns_len; ++i) { - if (m_known_ts_patterns[i] - .parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos)) + if (m_known_ts_patterns[i].parse_timestamp( + line, + timestamp, + utc_offset, + timestamp_begin_pos, + timestamp_end_pos + )) { return &m_known_ts_patterns[i]; } @@ -235,6 +241,7 @@ void TimestampPattern::clear() { bool TimestampPattern::parse_timestamp( string const& line, epochtime_t& timestamp, + UtcOffset& utc_offset, size_t& timestamp_begin_pos, size_t& timestamp_end_pos ) const { @@ -738,6 +745,7 @@ bool TimestampPattern::parse_timestamp( auto duration_since_epoch = timestamp_point - unix_epoch_point; // Convert to raw milliseconds timestamp = std::chrono::duration_cast(duration_since_epoch).count(); + utc_offset = std::chrono::seconds{0}; timestamp_begin_pos = ts_begin_ix; timestamp_end_pos = line_ix; @@ -745,7 +753,11 @@ bool TimestampPattern::parse_timestamp( return true; } -void TimestampPattern::insert_formatted_timestamp(epochtime_t const timestamp, string& msg) const { +void TimestampPattern::insert_formatted_timestamp( + epochtime_t const timestamp, + UtcOffset utc_offset, + string& msg +) const { size_t msg_length = msg.length(); string new_msg; diff --git a/components/core/src/clp/TimestampPattern.hpp b/components/core/src/clp/TimestampPattern.hpp index a1be80757b..14b02db25d 100644 --- a/components/core/src/clp/TimestampPattern.hpp +++ b/components/core/src/clp/TimestampPattern.hpp @@ -77,6 +77,7 @@ class TimestampPattern { * if found, parses the timestamp * @param line * @param timestamp Parsed timestamp + * @param utc_offset * @param timestamp_begin_pos * @param timestamp_end_pos * @return pointer to the timestamp pattern if found, nullptr otherwise @@ -84,6 +85,7 @@ class TimestampPattern { static TimestampPattern const* search_known_ts_patterns( std::string const& line, epochtime_t& timestamp, + UtcOffset& utc_offset, size_t& timestamp_begin_pos, size_t& timestamp_end_pos ); @@ -113,6 +115,7 @@ class TimestampPattern { * Tries to parse the timestamp from the given line * @param line * @param timestamp Parsed timestamp + * @param utc_offset * @param timestamp_begin_pos * @param timestamp_end_pos * @return true if parsed successfully, false otherwise @@ -120,17 +123,20 @@ class TimestampPattern { bool parse_timestamp( std::string const& line, epochtime_t& timestamp, + UtcOffset& utc_offset, size_t& timestamp_begin_pos, size_t& timestamp_end_pos ) const; /** * Inserts the timestamp into the given message using this pattern * @param timestamp + * @param utc_offset * @param msg * @throw TimestampPattern::OperationFailed if the the pattern contains unsupported format * specifiers or the message cannot fit the timestamp pattern */ - void insert_formatted_timestamp(epochtime_t timestamp, std::string& msg) const; + void + insert_formatted_timestamp(epochtime_t timestamp, UtcOffset utc_offset, std::string& msg) const; /** * Compares two timestamp patterns for equality diff --git a/components/core/src/clp/streaming_archive/reader/Archive.cpp b/components/core/src/clp/streaming_archive/reader/Archive.cpp index a836a37857..f7770edc0b 100644 --- a/components/core/src/clp/streaming_archive/reader/Archive.cpp +++ b/components/core/src/clp/streaming_archive/reader/Archive.cpp @@ -209,6 +209,7 @@ bool Archive::decompress_message( } timestamp_patterns[file.get_current_ts_pattern_ix()].second.insert_formatted_timestamp( compressed_msg.get_ts_in_milli(), + UtcOffset{0}, decompressed_msg ); } diff --git a/components/core/src/clp/streaming_archive/writer/Archive.cpp b/components/core/src/clp/streaming_archive/writer/Archive.cpp index a66193cb1b..f600fd5b2a 100644 --- a/components/core/src/clp/streaming_archive/writer/Archive.cpp +++ b/components/core/src/clp/streaming_archive/writer/Archive.cpp @@ -313,6 +313,7 @@ void Archive::write_msg( void Archive::write_msg_using_schema(LogEventView const& log_view) { epochtime_t timestamp = 0; + UtcOffset utc_offset{0}; TimestampPattern* timestamp_pattern = nullptr; auto const& log_output_buffer = log_view.get_log_output_buffer(); if (log_output_buffer->has_timestamp()) { @@ -321,6 +322,7 @@ void Archive::write_msg_using_schema(LogEventView const& log_view) { timestamp_pattern = (TimestampPattern*)TimestampPattern::search_known_ts_patterns( log_output_buffer->get_mutable_token(0).to_string(), timestamp, + utc_offset, start, end ); diff --git a/components/core/tests/test-TimestampPattern.cpp b/components/core/tests/test-TimestampPattern.cpp index 74f77949a4..90a9c22696 100644 --- a/components/core/tests/test-TimestampPattern.cpp +++ b/components/core/tests/test-TimestampPattern.cpp @@ -4,6 +4,7 @@ using clp::epochtime_t; using clp::TimestampPattern; +using clp::UtcOffset; using std::string; namespace { @@ -12,6 +13,7 @@ namespace { * @param line Line to parse timestamp from * @param pattern Pattern of timestamp to parse * @param expected_timestamp + * @param expected_utc_offset * @param expected_timestamp_begin_pos * @param expected_timestamp_end_pos * @param expected_restored_line Expected line after restoring timestamp @@ -20,6 +22,7 @@ void parse_and_validate_timestamp_pattern( string const& line, TimestampPattern const& pattern, epochtime_t expected_timestamp, + UtcOffset expected_utc_offset, size_t expected_timestamp_begin_pos, size_t expected_timestamp_end_pos, string const& expected_restored_line @@ -30,6 +33,7 @@ void parse_and_validate_timestamp_pattern( * @param line * @param expected_timestamp_pattern * @param expected_timestamp + * @param expected_utc_offset * @param expected_timestamp_begin_pos * @param expected_timestamp_end_pos */ @@ -37,6 +41,7 @@ void search_and_validate_timestamp_pattern( string const& line, TimestampPattern const& expected_timestamp_pattern, epochtime_t expected_timestamp, + UtcOffset expected_utc_offset, size_t expected_timestamp_begin_pos, size_t expected_timestamp_end_pos ); @@ -47,6 +52,7 @@ void search_and_validate_timestamp_pattern( * @param timestamp_begin_pos * @param timestamp_end_pos * @param timestamp + * @param utc_offset * @param pattern * @param expected_restored_line */ @@ -55,6 +61,7 @@ void validate_inserting_formatted_timestamp( size_t timestamp_begin_pos, size_t timestamp_end_pos, epochtime_t timestamp, + UtcOffset utc_offset, TimestampPattern const& pattern, string const& expected_restored_line ); @@ -62,17 +69,21 @@ void validate_inserting_formatted_timestamp( /** * Validates the result of parsing a timestamp. * @param expected_timestamp + * @param expected_utc_offset * @param expected_timestamp_begin_pos * @param expected_timestamp_end_pos * @param timestamp + * @param utc_offset * @param timestamp_begin_pos * @param timestamp_end_pos */ void validate_timestamp_parsing_result( epochtime_t expected_timestamp, + UtcOffset expected_utc_offset, size_t expected_timestamp_begin_pos, size_t expected_timestamp_end_pos, epochtime_t timestamp, + UtcOffset utc_offset, size_t timestamp_begin_pos, size_t timestamp_end_pos ); @@ -81,20 +92,24 @@ void parse_and_validate_timestamp_pattern( string const& line, TimestampPattern const& pattern, epochtime_t expected_timestamp, + UtcOffset expected_utc_offset, size_t expected_timestamp_begin_pos, size_t expected_timestamp_end_pos, string const& expected_restored_line ) { epochtime_t timestamp{0}; + UtcOffset utc_offset{0}; size_t timestamp_begin_pos{0}; size_t timestamp_end_pos{0}; - pattern.parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos); + pattern.parse_timestamp(line, timestamp, utc_offset, timestamp_begin_pos, timestamp_end_pos); validate_timestamp_parsing_result( expected_timestamp, + expected_utc_offset, expected_timestamp_begin_pos, expected_timestamp_end_pos, timestamp, + utc_offset, timestamp_begin_pos, timestamp_end_pos ); @@ -103,6 +118,7 @@ void parse_and_validate_timestamp_pattern( timestamp_begin_pos, timestamp_end_pos, timestamp, + utc_offset, pattern, expected_restored_line ); @@ -112,15 +128,18 @@ void search_and_validate_timestamp_pattern( string const& line, TimestampPattern const& expected_timestamp_pattern, epochtime_t expected_timestamp, + UtcOffset expected_utc_offset, size_t expected_timestamp_begin_pos, size_t expected_timestamp_end_pos ) { epochtime_t timestamp{0}; + UtcOffset utc_offset{0}; size_t timestamp_begin_pos{0}; size_t timestamp_end_pos{0}; auto* pattern = TimestampPattern::search_known_ts_patterns( line, timestamp, + utc_offset, timestamp_begin_pos, timestamp_end_pos ); @@ -131,9 +150,11 @@ void search_and_validate_timestamp_pattern( validate_timestamp_parsing_result( expected_timestamp, + expected_utc_offset, expected_timestamp_begin_pos, expected_timestamp_end_pos, timestamp, + utc_offset, timestamp_begin_pos, timestamp_end_pos ); @@ -142,6 +163,7 @@ void search_and_validate_timestamp_pattern( timestamp_begin_pos, timestamp_end_pos, timestamp, + utc_offset, *pattern, line ); @@ -152,6 +174,7 @@ void validate_inserting_formatted_timestamp( size_t timestamp_begin_pos, size_t timestamp_end_pos, epochtime_t timestamp, + UtcOffset utc_offset, TimestampPattern const& pattern, string const& expected_restored_line ) { @@ -160,20 +183,23 @@ void validate_inserting_formatted_timestamp( restored_line.assign(line, 0, timestamp_begin_pos); restored_line.append(line, timestamp_end_pos, line.length() - timestamp_end_pos); - pattern.insert_formatted_timestamp(timestamp, restored_line); + pattern.insert_formatted_timestamp(timestamp, utc_offset, restored_line); REQUIRE(expected_restored_line == restored_line); } void validate_timestamp_parsing_result( epochtime_t expected_timestamp, + UtcOffset expected_utc_offset, size_t expected_timestamp_begin_pos, size_t expected_timestamp_end_pos, epochtime_t timestamp, + UtcOffset utc_offset, size_t timestamp_begin_pos, size_t timestamp_end_pos ) { REQUIRE(expected_timestamp == timestamp); + REQUIRE(expected_utc_offset == utc_offset); REQUIRE(expected_timestamp_begin_pos == timestamp_begin_pos); REQUIRE(expected_timestamp_end_pos == timestamp_end_pos); } @@ -186,6 +212,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015-02-01T01:02:03.004 content after", {0, "%Y-%m-%dT%H:%M:%S.%3"}, 1'422'752'523'004, + UtcOffset{0}, 0, 23 ); @@ -194,6 +221,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015-02-01T01:02:03,004 content after", {0, "%Y-%m-%dT%H:%M:%S,%3"}, 1'422'752'523'004, + UtcOffset{0}, 0, 23 ); @@ -202,6 +230,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015-02-01 01:02:03.004 content after", {0, "%Y-%m-%d %H:%M:%S.%3"}, 1'422'752'523'004, + UtcOffset{0}, 0, 23 ); @@ -210,6 +239,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015-02-01 01:02:03,004 content after", {0, "%Y-%m-%d %H:%M:%S,%3"}, 1'422'752'523'004, + UtcOffset{0}, 0, 23 ); @@ -218,6 +248,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015/01/31T15:50:45.123 content after", {0, "%Y/%m/%dT%H:%M:%S.%3"}, 1'422'719'445'123, + UtcOffset{0}, 0, 23 ); @@ -226,6 +257,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015/01/31T15:50:45,123 content after", {0, "%Y/%m/%dT%H:%M:%S,%3"}, 1'422'719'445'123, + UtcOffset{0}, 0, 23 ); @@ -234,6 +266,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015/01/31 15:50:45.123 content after", {0, "%Y/%m/%d %H:%M:%S.%3"}, 1'422'719'445'123, + UtcOffset{0}, 0, 23 ); @@ -242,6 +275,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015/01/31 15:50:45,123 content after", {0, "%Y/%m/%d %H:%M:%S,%3"}, 1'422'719'445'123, + UtcOffset{0}, 0, 23 ); @@ -250,6 +284,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "[2015-02-01 01:02:03,004] content after", {0, "[%Y-%m-%d %H:%M:%S,%3]"}, 1'422'752'523'004, + UtcOffset{0}, 0, 25 ); @@ -258,6 +293,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "INFO [main] 2015-02-01 01:02:03,004 content after", {2, "%Y-%m-%d %H:%M:%S,%3"}, 1'422'752'523'004, + UtcOffset{0}, 12, 35 ); @@ -266,6 +302,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "<<<2015-02-01 01:02:03:004 content after", {0, "<<<%Y-%m-%d %H:%M:%S:%3"}, 1'422'752'523'004, + UtcOffset{0}, 0, 26 ); @@ -274,6 +311,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "01 Feb 2015 01:02:03,004 content after", {0, "%d %b %Y %H:%M:%S,%3"}, 1'422'752'523'004, + UtcOffset{0}, 0, 24 ); @@ -282,6 +320,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015-01-31T15:50:45 content after", {0, "%Y-%m-%dT%H:%M:%S"}, 1'422'719'445'000, + UtcOffset{0}, 0, 19 ); @@ -290,6 +329,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015-02-01 01:02:03 content after", {0, "%Y-%m-%d %H:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 0, 19 ); @@ -298,6 +338,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015/01/31T15:50:45 content after", {0, "%Y/%m/%dT%H:%M:%S"}, 1'422'719'445'000, + UtcOffset{0}, 0, 19 ); @@ -306,6 +347,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "2015/02/01 01:02:03 content after", {0, "%Y/%m/%d %H:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 0, 19 ); @@ -314,6 +356,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "[2015-02-01T01:02:03 content after", {0, "[%Y-%m-%dT%H:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 0, 20 ); @@ -322,6 +365,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "[20150201-01:02:03] content after", {0, "[%Y%m%d-%H:%M:%S]"}, 1'422'752'523'000, + UtcOffset{0}, 0, 19 ); @@ -330,6 +374,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "15/02/01 01:02:03 content after", {0, "%y/%m/%d %H:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 0, 17 ); @@ -338,6 +383,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "150201 1:02:03 content after", {0, "%y%m%d %k:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 0, 15 ); @@ -346,6 +392,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "Feb 01, 2015 1:02:03 AM content after", {0, "%b %d, %Y %l:%M:%S %p"}, 1'422'752'523'000, + UtcOffset{0}, 0, 24 ); @@ -354,6 +401,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "February 01, 2015 01:02 content after", {0, "%B %d, %Y %H:%M"}, 1'422'752'520'000, + UtcOffset{0}, 0, 23 ); @@ -362,6 +410,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "E [01/Feb/2015:01:02:03 content after", {1, "[%d/%b/%Y:%H:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 2, 23 ); @@ -370,6 +419,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "localhost - - [01/Feb/2015:01:02:03 content after", {3, "[%d/%b/%Y:%H:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 14, 35 ); @@ -378,6 +428,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "localhost - - [01/02/2015:01:02:03 content after", {3, "[%d/%m/%Y:%H:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 14, 34 ); @@ -387,6 +438,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "content after", {6, "%Y-%m-%d %H:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 57, 76 ); @@ -395,6 +447,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "update-alternatives 2015-02-01 01:02:03 content after", {1, "%Y-%m-%d %H:%M:%S"}, 1'422'752'523'000, + UtcOffset{0}, 20, 39 ); @@ -403,6 +456,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "ERROR: apport (pid 4557) Sun Feb 1 01:02:03 2015 content after", {4, "%a %b %e %H:%M:%S %Y"}, 1'422'752'523'000, + UtcOffset{0}, 25, 49 ); @@ -411,6 +465,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "Sun Feb 1 01:02:03 2015 content after", {0, "%a %b %e %H:%M:%S %Y"}, 1'422'752'523'000, + UtcOffset{0}, 0, 24 ); @@ -419,6 +474,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "Jan 21 11:56:42", {0, "%b %d %H:%M:%S"}, 1'771'002'000, + UtcOffset{0}, 0, 15 ); @@ -427,11 +483,19 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "01-21 11:56:42.392", {0, "%m-%d %H:%M:%S.%3"}, 1'771'002'392, + UtcOffset{0}, 0, 18 ); - search_and_validate_timestamp_pattern("626515123 content after", {0, "%#3"}, 626'515'123, 0, 9); + search_and_validate_timestamp_pattern( + "626515123 content after", + {0, "%#3"}, + 626'515'123, + UtcOffset{0}, + 0, + 9 + ); // Inputs for the patterns below get recognized as other timestamp patterns, so we can only test // the patterns by specifying them manually. @@ -441,6 +505,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "626515123 content after", {0, "%#6"}, 626'515, + UtcOffset{0}, 0, 9, "626515000 content after" @@ -450,6 +515,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { "626515123 content after", {0, "%#9"}, 626, + UtcOffset{0}, 0, 9, "626000000 content after" From 616fd7a49a84d8af52ed374a9ddaaef9bfbbae4b Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Fri, 29 Mar 2024 19:52:08 -0400 Subject: [PATCH 03/11] Add support for binding blobs to SQLite prepared statements. --- .../core/src/clp/SQLitePreparedStatement.cpp | 36 +++++++++++++++++++ .../core/src/clp/SQLitePreparedStatement.hpp | 7 ++++ 2 files changed, 43 insertions(+) diff --git a/components/core/src/clp/SQLitePreparedStatement.cpp b/components/core/src/clp/SQLitePreparedStatement.cpp index 93a34ec0b4..a8f4af944f 100644 --- a/components/core/src/clp/SQLitePreparedStatement.cpp +++ b/components/core/src/clp/SQLitePreparedStatement.cpp @@ -106,6 +106,42 @@ void SQLitePreparedStatement::bind_int64(string const& parameter_name, int64_t v bind_int64(parameter_index, value); } +void SQLitePreparedStatement::bind_blob64( + int parameter_index, + void* value, + size_t value_size, + bool copy_parameter +) { + auto return_value = sqlite3_bind_blob64( + m_statement_handle, + parameter_index, + value, + value_size, + copy_parameter ? SQLITE_TRANSIENT : SQLITE_STATIC + ); + if (SQLITE_OK != return_value) { + SPDLOG_ERROR( + "SQLitePreparedStatement: Failed to bind blob64 to statement - {}", + sqlite3_errmsg(m_db_handle) + ); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } +} + +void SQLitePreparedStatement::bind_blob64( + std::string const& parameter_name, + void* value, + size_t value_size, + bool copy_parameter +) { + auto parameter_index = sqlite3_bind_parameter_index(m_statement_handle, parameter_name.c_str()); + if (0 == parameter_index) { + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } + + bind_blob64(parameter_index, value, value_size, copy_parameter); +} + void SQLitePreparedStatement::bind_text( int parameter_index, std::string const& value, diff --git a/components/core/src/clp/SQLitePreparedStatement.hpp b/components/core/src/clp/SQLitePreparedStatement.hpp index 7cb7152c1b..f11f8e3ca1 100644 --- a/components/core/src/clp/SQLitePreparedStatement.hpp +++ b/components/core/src/clp/SQLitePreparedStatement.hpp @@ -41,6 +41,13 @@ class SQLitePreparedStatement { void bind_int(std::string const& parameter_name, int value); void bind_int64(int parameter_index, int64_t value); void bind_int64(std::string const& parameter_name, int64_t value); + void bind_blob64(int parameter_index, void* value, size_t value_size, bool copy_parameter); + void bind_blob64( + std::string const& parameter_name, + void* value, + size_t value_size, + bool copy_parameter + ); void bind_text(int parameter_index, std::string const& value, bool copy_parameter); void bind_text(std::string const& parameter_name, std::string const& value, bool copy_parameter); From 9aeac48ff6b444e147edbeb4d0735e051148d0e1 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Fri, 29 Mar 2024 21:15:29 -0400 Subject: [PATCH 04/11] Add support for getting blobs from SQLite prepared statements. --- .../core/src/clp/SQLitePreparedStatement.cpp | 29 +++++++++++++++++++ .../core/src/clp/SQLitePreparedStatement.hpp | 3 ++ 2 files changed, 32 insertions(+) diff --git a/components/core/src/clp/SQLitePreparedStatement.cpp b/components/core/src/clp/SQLitePreparedStatement.cpp index a8f4af944f..c5d89a5138 100644 --- a/components/core/src/clp/SQLitePreparedStatement.cpp +++ b/components/core/src/clp/SQLitePreparedStatement.cpp @@ -237,6 +237,35 @@ int64_t SQLitePreparedStatement::column_int64(string const& parameter_name) cons return column_int64(parameter_index); } +void SQLitePreparedStatement::column_blob( + int parameter_index, + void const*& value, + size_t& value_size +) const { + if (false == m_row_ready) { + throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); + } + + value = sqlite3_column_blob(m_statement_handle, parameter_index); + value_size = sqlite3_column_bytes(m_statement_handle, parameter_index); +} + +void SQLitePreparedStatement::column_blob( + std::string const& parameter_name, + void const*& value, + size_t& value_size +) const { + if (false == m_row_ready) { + throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); + } + auto parameter_index = sqlite3_bind_parameter_index(m_statement_handle, parameter_name.c_str()); + if (0 == parameter_index) { + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } + + column_blob(parameter_index, value, value_size); +} + void SQLitePreparedStatement::column_string(int parameter_index, std::string& value) const { if (false == m_row_ready) { throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); diff --git a/components/core/src/clp/SQLitePreparedStatement.hpp b/components/core/src/clp/SQLitePreparedStatement.hpp index f11f8e3ca1..0903277dd3 100644 --- a/components/core/src/clp/SQLitePreparedStatement.hpp +++ b/components/core/src/clp/SQLitePreparedStatement.hpp @@ -58,6 +58,9 @@ class SQLitePreparedStatement { int column_int(std::string const& parameter_name) const; int64_t column_int64(int parameter_index) const; int64_t column_int64(std::string const& parameter_name) const; + void column_blob(int parameter_index, void const*& value, size_t& value_size) const; + void + column_blob(std::string const& parameter_name, void const*& value, size_t& value_size) const; void column_string(int parameter_index, std::string& value) const; void column_string(std::string const& parameter_name, std::string& value) const; From b8d1a275a2a6bd24544f1f7be678c34dd9375939 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sat, 30 Mar 2024 21:56:54 -0400 Subject: [PATCH 05/11] Move UtcOffset into time_types.hpp --- components/core/CMakeLists.txt | 1 + components/core/src/clp/Defs.h | 2 -- components/core/src/clp/MessageParser.cpp | 1 + components/core/src/clp/ParsedMessage.cpp | 2 ++ components/core/src/clp/ParsedMessage.hpp | 1 + components/core/src/clp/TimestampPattern.cpp | 1 + components/core/src/clp/TimestampPattern.hpp | 1 + components/core/src/clp/clg/CMakeLists.txt | 1 + components/core/src/clp/clo/CMakeLists.txt | 1 + components/core/src/clp/clp/CMakeLists.txt | 1 + .../core/src/clp/streaming_archive/reader/Archive.cpp | 1 + .../core/src/clp/streaming_archive/writer/Archive.cpp | 1 + components/core/src/clp/time_types.hpp | 11 +++++++++++ components/core/tests/test-TimestampPattern.cpp | 1 + 14 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 components/core/src/clp/time_types.hpp diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 3ec2b9bca6..cec4839d0c 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -352,6 +352,7 @@ set(SOURCE_FILES_unitTest src/clp/streaming_compression/zstd/Decompressor.hpp src/clp/StringReader.cpp src/clp/StringReader.hpp + src/clp/time_types.hpp src/clp/TimestampPattern.cpp src/clp/TimestampPattern.hpp src/clp/TraceableException.hpp diff --git a/components/core/src/clp/Defs.h b/components/core/src/clp/Defs.h index 5221a12596..231b66d5ef 100644 --- a/components/core/src/clp/Defs.h +++ b/components/core/src/clp/Defs.h @@ -11,8 +11,6 @@ namespace clp { typedef int64_t epochtime_t; constexpr epochtime_t cEpochTimeMin = std::numeric_limits::min(); constexpr epochtime_t cEpochTimeMax = std::numeric_limits::max(); -// We use seconds resolution to support GPS clock offsets -using UtcOffset = std::chrono::seconds; typedef uint64_t variable_dictionary_id_t; constexpr variable_dictionary_id_t cVariableDictionaryIdMax diff --git a/components/core/src/clp/MessageParser.cpp b/components/core/src/clp/MessageParser.cpp index baa3830b87..c95c64902a 100644 --- a/components/core/src/clp/MessageParser.cpp +++ b/components/core/src/clp/MessageParser.cpp @@ -3,6 +3,7 @@ #include #include "Defs.h" +#include "time_types.hpp" #include "TimestampPattern.hpp" constexpr char cLineDelimiter = '\n'; diff --git a/components/core/src/clp/ParsedMessage.cpp b/components/core/src/clp/ParsedMessage.cpp index b373ff013f..c725873a30 100644 --- a/components/core/src/clp/ParsedMessage.cpp +++ b/components/core/src/clp/ParsedMessage.cpp @@ -1,5 +1,7 @@ #include "ParsedMessage.hpp" +#include "time_types.hpp" + using std::string; namespace clp { diff --git a/components/core/src/clp/ParsedMessage.hpp b/components/core/src/clp/ParsedMessage.hpp index d6a26b394f..87ed5b5fc5 100644 --- a/components/core/src/clp/ParsedMessage.hpp +++ b/components/core/src/clp/ParsedMessage.hpp @@ -3,6 +3,7 @@ #include +#include "time_types.hpp" #include "TimestampPattern.hpp" namespace clp { diff --git a/components/core/src/clp/TimestampPattern.cpp b/components/core/src/clp/TimestampPattern.cpp index 9d31b72cfa..d06019d222 100644 --- a/components/core/src/clp/TimestampPattern.cpp +++ b/components/core/src/clp/TimestampPattern.cpp @@ -7,6 +7,7 @@ #include #include "spdlog_with_specializations.hpp" +#include "time_types.hpp" using std::string; using std::to_string; diff --git a/components/core/src/clp/TimestampPattern.hpp b/components/core/src/clp/TimestampPattern.hpp index 14b02db25d..7a62e58071 100644 --- a/components/core/src/clp/TimestampPattern.hpp +++ b/components/core/src/clp/TimestampPattern.hpp @@ -7,6 +7,7 @@ #include "Defs.h" #include "FileWriter.hpp" +#include "time_types.hpp" #include "TraceableException.hpp" namespace clp { diff --git a/components/core/src/clp/clg/CMakeLists.txt b/components/core/src/clp/clg/CMakeLists.txt index b19712f7bc..60ad3f0f5b 100644 --- a/components/core/src/clp/clg/CMakeLists.txt +++ b/components/core/src/clp/clg/CMakeLists.txt @@ -96,6 +96,7 @@ set( ../streaming_compression/zstd/Decompressor.hpp ../StringReader.cpp ../StringReader.hpp + ../time_types.hpp ../TimestampPattern.cpp ../TimestampPattern.hpp ../TraceableException.hpp diff --git a/components/core/src/clp/clo/CMakeLists.txt b/components/core/src/clp/clo/CMakeLists.txt index f5f6f99b77..f2803403a6 100644 --- a/components/core/src/clp/clo/CMakeLists.txt +++ b/components/core/src/clp/clo/CMakeLists.txt @@ -90,6 +90,7 @@ set( ../StringReader.hpp ../Thread.cpp ../Thread.hpp + ../time_types.hpp ../TimestampPattern.cpp ../TimestampPattern.hpp ../TraceableException.hpp diff --git a/components/core/src/clp/clp/CMakeLists.txt b/components/core/src/clp/clp/CMakeLists.txt index dc1a9038a9..27360af937 100644 --- a/components/core/src/clp/clp/CMakeLists.txt +++ b/components/core/src/clp/clp/CMakeLists.txt @@ -119,6 +119,7 @@ set( ../streaming_compression/zstd/Decompressor.hpp ../StringReader.cpp ../StringReader.hpp + ../time_types.hpp ../TimestampPattern.cpp ../TimestampPattern.hpp ../TraceableException.hpp diff --git a/components/core/src/clp/streaming_archive/reader/Archive.cpp b/components/core/src/clp/streaming_archive/reader/Archive.cpp index f7770edc0b..3185fd53a5 100644 --- a/components/core/src/clp/streaming_archive/reader/Archive.cpp +++ b/components/core/src/clp/streaming_archive/reader/Archive.cpp @@ -10,6 +10,7 @@ #include "../../EncodedVariableInterpreter.hpp" #include "../../spdlog_with_specializations.hpp" +#include "../../time_types.hpp" #include "../../Utils.hpp" #include "../ArchiveMetadata.hpp" #include "../Constants.hpp" diff --git a/components/core/src/clp/streaming_archive/writer/Archive.cpp b/components/core/src/clp/streaming_archive/writer/Archive.cpp index f600fd5b2a..386d1b052f 100644 --- a/components/core/src/clp/streaming_archive/writer/Archive.cpp +++ b/components/core/src/clp/streaming_archive/writer/Archive.cpp @@ -17,6 +17,7 @@ #include "../../EncodedVariableInterpreter.hpp" #include "../../ir/types.hpp" #include "../../spdlog_with_specializations.hpp" +#include "../../time_types.hpp" #include "../../Utils.hpp" #include "../Constants.hpp" #include "utils.hpp" diff --git a/components/core/src/clp/time_types.hpp b/components/core/src/clp/time_types.hpp new file mode 100644 index 0000000000..7197b05f46 --- /dev/null +++ b/components/core/src/clp/time_types.hpp @@ -0,0 +1,11 @@ +#ifndef CLP_TIME_TYPES_HPP +#define CLP_TIME_TYPES_HPP + +#include + +namespace clp { +// We use seconds resolution to support GPS clock offsets +using UtcOffset = std::chrono::seconds; +} // namespace clp + +#endif // CLP_TIME_TYPES_HPP diff --git a/components/core/tests/test-TimestampPattern.cpp b/components/core/tests/test-TimestampPattern.cpp index 90a9c22696..e0609314da 100644 --- a/components/core/tests/test-TimestampPattern.cpp +++ b/components/core/tests/test-TimestampPattern.cpp @@ -1,5 +1,6 @@ #include +#include "../src/clp/time_types.hpp" #include "../src/clp/TimestampPattern.hpp" using clp::epochtime_t; From 92e56c3861801b7d11610b8293ea6870236c4c1e Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sat, 30 Mar 2024 23:38:47 -0400 Subject: [PATCH 06/11] Add support for new UtcOffsetChange packet to IR streams. --- .../clp/ffi/ir_stream/decoding_methods.cpp | 37 ++++++-- .../clp/ffi/ir_stream/decoding_methods.hpp | 24 ++++- .../clp/ffi/ir_stream/encoding_methods.cpp | 8 ++ .../clp/ffi/ir_stream/encoding_methods.hpp | 8 ++ .../clp/ffi/ir_stream/protocol_constants.hpp | 2 + .../core/src/clp/ir/LogEventDeserializer.cpp | 27 +++++- .../core/src/clp/ir/LogEventDeserializer.hpp | 6 +- .../core/tests/test-ir_encoding_methods.cpp | 90 +++++++++++++++++++ 8 files changed, 187 insertions(+), 15 deletions(-) diff --git a/components/core/src/clp/ffi/ir_stream/decoding_methods.cpp b/components/core/src/clp/ffi/ir_stream/decoding_methods.cpp index e12c6d48fb..9532a1c1bb 100644 --- a/components/core/src/clp/ffi/ir_stream/decoding_methods.cpp +++ b/components/core/src/clp/ffi/ir_stream/decoding_methods.cpp @@ -272,13 +272,21 @@ static IRErrorCode generic_deserialize_log_event( string& message, epoch_time_ms_t& timestamp ) { + encoded_tag_t encoded_tag{cProtocol::Eof}; + if (ErrorCode_Success != reader.try_read_numeric_value(encoded_tag)) { + return IRErrorCode_Incomplete_IR; + } + if (cProtocol::Eof == encoded_tag) { + return IRErrorCode_Eof; + } + message.clear(); vector encoded_vars; vector dict_vars; string logtype; if (auto error_code - = deserialize_log_event(reader, logtype, encoded_vars, dict_vars, timestamp); + = deserialize_log_event(reader, encoded_tag, logtype, encoded_vars, dict_vars, timestamp); IRErrorCode_Success != error_code) { return error_code; @@ -351,19 +359,12 @@ static IRErrorCode deserialize_metadata( template auto deserialize_log_event( ReaderInterface& reader, + encoded_tag_t encoded_tag, string& logtype, vector& encoded_vars, vector& dict_vars, epoch_time_ms_t& timestamp_or_timestamp_delta ) -> IRErrorCode { - encoded_tag_t encoded_tag{cProtocol::Eof}; - if (ErrorCode_Success != reader.try_read_numeric_value(encoded_tag)) { - return IRErrorCode_Incomplete_IR; - } - if (cProtocol::Eof == encoded_tag) { - return IRErrorCode_Eof; - } - // Handle variables string var_str; bool is_encoded_var{false}; @@ -433,6 +434,13 @@ IRErrorCode get_encoding_type(ReaderInterface& reader, bool& is_four_bytes_encod return IRErrorCode_Success; } +IRErrorCode deserialize_tag(ReaderInterface& reader, encoded_tag_t& tag) { + if (ErrorCode_Success != reader.try_read_numeric_value(tag)) { + return IRErrorCode_Incomplete_IR; + } + return IRErrorCode_Success; +} + IRErrorCode deserialize_preamble( ReaderInterface& reader, encoded_tag_t& metadata_type, @@ -503,6 +511,15 @@ IRProtocolErrorCode validate_protocol_version(std::string_view protocol_version) return IRProtocolErrorCode_Supported; } +IRErrorCode deserialize_utc_offset_change(ReaderInterface& reader, UtcOffset& utc_offset) { + int64_t serialized_utc_offset{}; + if (false == deserialize_int(reader, serialized_utc_offset)) { + return IRErrorCode_Incomplete_IR; + } + utc_offset = UtcOffset{serialized_utc_offset}; + return IRErrorCode_Success; +} + namespace four_byte_encoding { IRErrorCode deserialize_log_event(ReaderInterface& reader, string& message, epoch_time_ms_t& timestamp_delta) { @@ -524,6 +541,7 @@ deserialize_log_event(ReaderInterface& reader, string& message, epoch_time_ms_t& // Explicitly declare specializations template auto deserialize_log_event( ReaderInterface& reader, + encoded_tag_t encoded_tag, string& logtype, vector& encoded_vars, vector& dict_vars, @@ -532,6 +550,7 @@ template auto deserialize_log_event( template auto deserialize_log_event( ReaderInterface& reader, + encoded_tag_t encoded_tag, string& logtype, vector& encoded_vars, vector& dict_vars, diff --git a/components/core/src/clp/ffi/ir_stream/decoding_methods.hpp b/components/core/src/clp/ffi/ir_stream/decoding_methods.hpp index 199ba39d21..52cdeb2998 100644 --- a/components/core/src/clp/ffi/ir_stream/decoding_methods.hpp +++ b/components/core/src/clp/ffi/ir_stream/decoding_methods.hpp @@ -6,6 +6,7 @@ #include "../../ir/types.hpp" #include "../../ReaderInterface.hpp" +#include "../../time_types.hpp" #include "../encoding_methods.hpp" namespace clp::ffi::ir_stream { @@ -55,10 +56,20 @@ class DecodingException : public TraceableException { */ IRErrorCode get_encoding_type(ReaderInterface& reader, bool& is_four_bytes_encoding); +/** + * Deserializes the tag for the next packet. + * @param reader + * @param tag Returns the tag of the next packet. + * @return IRErrorCode_Success on success + * @return IRErrorCode_Incomplete_IR if reader doesn't contain enough data to deserialize + */ +IRErrorCode deserialize_tag(ReaderInterface& reader, encoded_tag_t& tag); + /** * Deserializes a log event from the given stream * @tparam encoded_variable_t * @param reader + * @param encoded_tag Tag of the next packet to read * @param logtype Returns the logtype * @param encoded_vars Returns the encoded variables * @param dict_vars Returns the dictionary variables @@ -67,11 +78,11 @@ IRErrorCode get_encoding_type(ReaderInterface& reader, bool& is_four_bytes_encod * @return IRErrorCode_Success on success * @return IRErrorCode_Corrupted_IR if reader contains invalid IR * @return IRErrorCode_Incomplete_IR if reader doesn't contain enough data - * @return IRErrorCode_Eof on reaching the end of the stream */ template auto deserialize_log_event( ReaderInterface& reader, + encoded_tag_t encoded_tag, std::string& logtype, std::vector& encoded_vars, std::vector& dict_vars, @@ -162,6 +173,15 @@ IRErrorCode deserialize_preamble( */ IRProtocolErrorCode validate_protocol_version(std::string_view protocol_version); +/** + * Deserializes a UTC offset change packet. + * @param reader + * @param utc_offset The deserialized UTC offset. + * @return IRErrorCode_Success on success + * @return IRErrorCode_Incomplete_IR if reader doesn't contain enough data to deserialize + */ +IRErrorCode deserialize_utc_offset_change(ReaderInterface& reader, UtcOffset& utc_offset); + namespace eight_byte_encoding { /** * Deserializes the next log event from an eight-byte encoding IR stream. @@ -172,7 +192,6 @@ namespace eight_byte_encoding { * @return ErrorCode_Corrupted_IR if reader contains invalid IR * @return ErrorCode_Decode_Error if the log event cannot be properly deserialized * @return ErrorCode_Incomplete_IR if reader doesn't contain enough data to deserialize - * @return ErrorCode_End_of_IR if the IR ends */ IRErrorCode deserialize_log_event( ReaderInterface& reader, @@ -191,7 +210,6 @@ namespace four_byte_encoding { * @return ErrorCode_Corrupted_IR if reader contains invalid IR * @return ErrorCode_Decode_Error if the log event cannot be properly deserialized * @return ErrorCode_Incomplete_IR if reader doesn't contain enough data to deserialize - * @return ErrorCode_End_of_IR if the IR ends */ IRErrorCode deserialize_log_event( ReaderInterface& reader, diff --git a/components/core/src/clp/ffi/ir_stream/encoding_methods.cpp b/components/core/src/clp/ffi/ir_stream/encoding_methods.cpp index bf14c4707e..acaa603b59 100644 --- a/components/core/src/clp/ffi/ir_stream/encoding_methods.cpp +++ b/components/core/src/clp/ffi/ir_stream/encoding_methods.cpp @@ -4,6 +4,7 @@ #include "../../ir/parsing.hpp" #include "../../ir/types.hpp" +#include "../../time_types.hpp" #include "byteswap.hpp" #include "protocol_constants.hpp" @@ -306,4 +307,11 @@ bool serialize_timestamp(epoch_time_ms_t timestamp_delta, std::vector& i return true; } } // namespace four_byte_encoding + +void serialize_utc_offset_change(UtcOffset utc_offset, std::vector& ir_buf) { + ir_buf.emplace_back(cProtocol::Payload::UtcOffsetChange); + // TODO Decide whether we want to use milliseconds for the UTC offset. If not, we can drop the + // static cast. + serialize_int(static_cast(utc_offset.count()), ir_buf); +} } // namespace clp::ffi::ir_stream diff --git a/components/core/src/clp/ffi/ir_stream/encoding_methods.hpp b/components/core/src/clp/ffi/ir_stream/encoding_methods.hpp index 542a143574..d1b15b2b42 100644 --- a/components/core/src/clp/ffi/ir_stream/encoding_methods.hpp +++ b/components/core/src/clp/ffi/ir_stream/encoding_methods.hpp @@ -5,6 +5,7 @@ #include #include "../../ir/types.hpp" +#include "../../time_types.hpp" #include "../encoding_methods.hpp" namespace clp::ffi::ir_stream { @@ -91,6 +92,13 @@ bool serialize_message(std::string_view message, std::string& logtype, std::vect */ bool serialize_timestamp(ir::epoch_time_ms_t timestamp_delta, std::vector& ir_buf); } // namespace four_byte_encoding + +/** + * Serializes the given UTC offset into the IR stream. + * @param utc_offset + * @param ir_buf + */ +void serialize_utc_offset_change(UtcOffset utc_offset, std::vector& ir_buf); } // namespace clp::ffi::ir_stream #endif // CLP_FFI_IR_STREAM_ENCODING_METHODS_HPP diff --git a/components/core/src/clp/ffi/ir_stream/protocol_constants.hpp b/components/core/src/clp/ffi/ir_stream/protocol_constants.hpp index f122557f83..7d777ac5d8 100644 --- a/components/core/src/clp/ffi/ir_stream/protocol_constants.hpp +++ b/components/core/src/clp/ffi/ir_stream/protocol_constants.hpp @@ -47,6 +47,8 @@ constexpr int8_t TimestampDeltaByte = 0x31; constexpr int8_t TimestampDeltaShort = 0x32; constexpr int8_t TimestampDeltaInt = 0x33; constexpr int8_t TimestampDeltaLong = 0x34; + +constexpr int8_t UtcOffsetChange = 0x41; } // namespace Payload constexpr int8_t FourByteEncodingMagicNumber[] diff --git a/components/core/src/clp/ir/LogEventDeserializer.cpp b/components/core/src/clp/ir/LogEventDeserializer.cpp index 6ab643142e..1011e3d024 100644 --- a/components/core/src/clp/ir/LogEventDeserializer.cpp +++ b/components/core/src/clp/ir/LogEventDeserializer.cpp @@ -6,6 +6,7 @@ #include #include "../ffi/ir_stream/decoding_methods.hpp" +#include "../ffi/ir_stream/protocol_constants.hpp" #include "types.hpp" namespace clp::ir { @@ -68,6 +69,29 @@ auto LogEventDeserializer::create(ReaderInterface& reader) template auto LogEventDeserializer::deserialize_log_event() -> BOOST_OUTCOME_V2_NAMESPACE::std_result> { + // Process any packets before the log event + ffi::ir_stream::encoded_tag_t tag{}; + while (true) { + auto ir_error_code = ffi::ir_stream::deserialize_tag(m_reader, tag); + if (ffi::ir_stream::IRErrorCode_Incomplete_IR == ir_error_code) { + return std::errc::result_out_of_range; + } + + if (ffi::ir_stream::cProtocol::Eof == tag) { + return std::errc::no_message_available; + } + + if (ffi::ir_stream::cProtocol::Payload::UtcOffsetChange == tag) { + ir_error_code = ffi::ir_stream::deserialize_utc_offset_change(m_reader, m_utc_offset); + if (ffi::ir_stream::IRErrorCode_Incomplete_IR == ir_error_code) { + return std::errc::result_out_of_range; + } + } else { + // Packet must be a log event + break; + } + } + epoch_time_ms_t timestamp_or_timestamp_delta{}; std::string logtype; std::vector dict_vars; @@ -75,6 +99,7 @@ auto LogEventDeserializer::deserialize_log_event() auto ir_error_code = ffi::ir_stream::deserialize_log_event( m_reader, + tag, logtype, encoded_vars, dict_vars, @@ -82,8 +107,6 @@ auto LogEventDeserializer::deserialize_log_event() ); if (ffi::ir_stream::IRErrorCode_Success != ir_error_code) { switch (ir_error_code) { - case ffi::ir_stream::IRErrorCode_Eof: - return std::errc::no_message_available; case ffi::ir_stream::IRErrorCode_Incomplete_IR: return std::errc::result_out_of_range; case ffi::ir_stream::IRErrorCode_Corrupted_IR: diff --git a/components/core/src/clp/ir/LogEventDeserializer.hpp b/components/core/src/clp/ir/LogEventDeserializer.hpp index e6f43aca6d..b2e9463e73 100644 --- a/components/core/src/clp/ir/LogEventDeserializer.hpp +++ b/components/core/src/clp/ir/LogEventDeserializer.hpp @@ -6,6 +6,7 @@ #include #include "../ReaderInterface.hpp" +#include "../time_types.hpp" #include "../TimestampPattern.hpp" #include "../TraceableException.hpp" #include "../type_utils.hpp" @@ -51,12 +52,14 @@ class LogEventDeserializer { return m_timestamp_pattern; } + [[nodiscard]] auto get_current_utc_offset() const -> UtcOffset { return m_utc_offset; } + /** * Deserializes a log event from the stream * @return A result containing the log event or an error code indicating the failure: * - std::errc::no_message_available on reaching the end of the IR stream * - std::errc::result_out_of_range if the IR stream is truncated - * - std::errc::result_out_of_range if the IR stream is corrupted + * - std::errc::protocol_error if the IR stream is corrupted */ [[nodiscard]] auto deserialize_log_event() -> BOOST_OUTCOME_V2_NAMESPACE::std_result>; @@ -71,6 +74,7 @@ class LogEventDeserializer { // Variables TimestampPattern m_timestamp_pattern{0, "%Y-%m-%dT%H:%M:%S.%3"}; + UtcOffset m_utc_offset{0}; [[no_unique_address]] std::conditional_t< std::is_same_v, epoch_time_ms_t, diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index c9164eb8a5..5040c930af 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -6,6 +6,7 @@ #include "../src/clp/ffi/ir_stream/decoding_methods.hpp" #include "../src/clp/ffi/ir_stream/encoding_methods.hpp" #include "../src/clp/ffi/ir_stream/protocol_constants.hpp" +#include "../src/clp/ir/LogEventDeserializer.hpp" #include "../src/clp/ir/types.hpp" using clp::BufferReader; @@ -28,6 +29,7 @@ using clp::ffi::wildcard_query_matches_any_encoded_var; using clp::ir::eight_byte_encoded_variable_t; using clp::ir::epoch_time_ms_t; using clp::ir::four_byte_encoded_variable_t; +using clp::ir::LogEventDeserializer; using clp::ir::VariablePlaceholder; using clp::size_checked_pointer_cast; using std::chrono::duration_cast; @@ -643,3 +645,91 @@ TEMPLATE_TEST_CASE( } REQUIRE(complete_ir_buffer.get_pos() == ir_buf.size()); } + +// TODO Deduplicate parts copied from the test above. +TEMPLATE_TEST_CASE( + "clp::ir::LogEventDeserializer", + "[clp][ir][LogEventDeserializer]", + four_byte_encoded_variable_t, + eight_byte_encoded_variable_t +) { + vector ir_buf; + string logtype; + + epoch_time_ms_t preamble_ts = get_current_ts(); + constexpr char timestamp_pattern[] = "%Y-%m-%d %H:%M:%S,%3"; + constexpr char timestamp_pattern_syntax[] = "yyyy-MM-dd HH:mm:ss"; + constexpr char time_zone_id[] = "Asia/Tokyo"; + REQUIRE(serialize_preamble( + timestamp_pattern, + timestamp_pattern_syntax, + time_zone_id, + preamble_ts, + ir_buf + )); + size_t const encoded_preamble_end_pos = ir_buf.size(); + + string message; + epoch_time_ms_t ts; + vector reference_messages; + vector reference_timestamps; + vector reference_utc_offsets; + + clp::UtcOffset utc_offset{0}; + reference_utc_offsets.emplace_back(utc_offset); + + // First message + message = "Static <\text>, dictVar1, 123, 456.7, dictVar2, 987, 654.3, end of static text"; + ts = get_next_timestamp_for_test(); + REQUIRE(encode_message(ts, message, logtype, ir_buf)); + reference_messages.push_back(message); + reference_timestamps.push_back(ts); + + utc_offset = clp::UtcOffset{std::chrono::seconds{-5 * 60 * 60}}; + reference_utc_offsets.emplace_back(utc_offset); + clp::ffi::ir_stream::serialize_utc_offset_change(utc_offset, ir_buf); + + // Second message + message = "Static <\text>, dictVar3, 355.2352512, 23953324532112, " + "python3.4.6, end of static text"; + ts = get_next_timestamp_for_test(); + REQUIRE(encode_message(ts, message, logtype, ir_buf)); + reference_messages.push_back(message); + reference_timestamps.push_back(ts); + + utc_offset = clp::UtcOffset{std::chrono::seconds{5 * 60 * 60}}; + reference_utc_offsets.emplace_back(utc_offset); + clp::ffi::ir_stream::serialize_utc_offset_change(utc_offset, ir_buf); + + // Third message + message = "Static <\text>, dictVar3, 355.2352512, 23953324532112, " + "python3.4.6, end of static text"; + ts = get_next_timestamp_for_test(); + REQUIRE(encode_message(ts, message, logtype, ir_buf)); + reference_messages.push_back(message); + reference_timestamps.push_back(ts); + + ir_buf.push_back(clp::ffi::ir_stream::cProtocol::Eof); + + BufferReader complete_ir_buffer{ + size_checked_pointer_cast(ir_buf.data()), + ir_buf.size() + }; + + bool is_four_bytes_encoding; + REQUIRE(get_encoding_type(complete_ir_buffer, is_four_bytes_encoding) + == IRErrorCode::IRErrorCode_Success); + REQUIRE(match_encoding_type(is_four_bytes_encoding)); + + auto create_result = LogEventDeserializer::create(complete_ir_buffer); + REQUIRE(false == create_result.has_error()); + auto& log_event_deserializer = create_result.value(); + for (size_t ix = 0; ix < reference_messages.size(); ix++) { + auto result = log_event_deserializer.deserialize_log_event(); + REQUIRE(false == result.has_error()); + REQUIRE(log_event_deserializer.get_current_utc_offset() == reference_utc_offsets[ix]); + } + auto result = log_event_deserializer.deserialize_log_event(); + REQUIRE(result.has_error()); + REQUIRE(std::errc::no_message_available == result.error()); +} From a980346a3eb4a65ef70daab19d13f5cd2beefb82 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 31 Mar 2024 00:22:50 -0400 Subject: [PATCH 07/11] Add support for parsing UTC offsets of the form +/-hhmm and storing them in the archive's metadata. --- components/core/src/clp/TimestampPattern.cpp | 78 ++++++++++++++++++- components/core/src/clp/TimestampPattern.hpp | 1 + .../core/src/clp/clp/FileCompressor.cpp | 15 ++++ .../src/clp/streaming_archive/Constants.hpp | 1 + .../src/clp/streaming_archive/MetadataDB.cpp | 31 ++++++++ .../src/clp/streaming_archive/MetadataDB.hpp | 6 ++ .../clp/streaming_archive/reader/Archive.cpp | 33 +++----- .../src/clp/streaming_archive/reader/File.cpp | 65 ++++++++++++++-- .../src/clp/streaming_archive/reader/File.hpp | 23 +++++- .../clp/streaming_archive/writer/Archive.cpp | 8 ++ .../clp/streaming_archive/writer/Archive.hpp | 7 ++ .../src/clp/streaming_archive/writer/File.cpp | 25 ++++++ .../src/clp/streaming_archive/writer/File.hpp | 11 +++ .../clp/streaming_archive/writer/utils.cpp | 8 ++ .../core/tests/test-TimestampPattern.cpp | 18 +++++ 15 files changed, 301 insertions(+), 29 deletions(-) diff --git a/components/core/src/clp/TimestampPattern.cpp b/components/core/src/clp/TimestampPattern.cpp index d06019d222..7f8da0977c 100644 --- a/components/core/src/clp/TimestampPattern.cpp +++ b/components/core/src/clp/TimestampPattern.cpp @@ -121,6 +121,10 @@ namespace clp { void TimestampPattern::init() { // First create vector of observed patterns so that it's easy to maintain vector patterns; + // E.g. 2015-01-31T15:50:45.392-0500 + patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%3%z"); + // E.g. 2015-01-31T15:50:45,392-0500 + patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S,%3%z"); // E.g. 2015-01-31T15:50:45.392 patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%3"); // E.g. 2015-01-31T15:50:45,392 @@ -649,6 +653,62 @@ bool TimestampPattern::parse_timestamp( case '#': state = ParserState::RelativeTimestampUnit; break; + case 'z': { + constexpr int cFieldLength = 5; + if (line_ix + cFieldLength > line_length) { + // Too short + return false; + } + + char sign = line[line_ix]; + if ('+' != sign && '-' != sign) { + // Not a sign + return false; + } + bool is_negative = ('-' == sign); + ++line_ix; + + int value; + if (false + == convert_string_to_number( + line, + line_ix, + line_ix + 2, + '0', + value + ) + || value < 0 || value > 14) + { + return false; + } + std::chrono::hours utc_offset_hours{value}; + line_ix += 2; + + if (false + == convert_string_to_number( + line, + line_ix, + line_ix + 2, + '0', + value + ) + || value < 0 || value > 59) + { + return false; + } + std::chrono::minutes utc_offset_minutes{value}; + line_ix += 2; + + utc_offset = utc_offset_hours + utc_offset_minutes; + if (is_negative) { + if (utc_offset.count() == 0) { + // -0000 is not a valid UTC offset + return false; + } + utc_offset *= -1; + } + break; + } default: return false; } @@ -746,7 +806,6 @@ bool TimestampPattern::parse_timestamp( auto duration_since_epoch = timestamp_point - unix_epoch_point; // Convert to raw milliseconds timestamp = std::chrono::duration_cast(duration_since_epoch).count(); - utc_offset = std::chrono::seconds{0}; timestamp_begin_pos = ts_begin_ix; timestamp_end_pos = line_ix; @@ -904,6 +963,23 @@ void TimestampPattern::insert_formatted_timestamp( case '#': // Relative timestamp state = ParserState::RelativeTimestampUnit; break; + case 'z': { // UTC offset + auto utc_offset_hours + = std::chrono::duration_cast(utc_offset); + auto utc_offset_minutes = std::chrono::duration_cast( + utc_offset - utc_offset_hours + ); + if (utc_offset_hours.count() >= 0) { + new_msg += '+'; + } else { + new_msg += '-'; + utc_offset_hours *= -1; + utc_offset_minutes *= -1; + } + append_padded_value(utc_offset_hours.count(), '0', 2, new_msg); + append_padded_value(utc_offset_minutes.count(), '0', 2, new_msg); + break; + } default: throw OperationFailed(ErrorCode_Unsupported, __FILENAME__, __LINE__); } diff --git a/components/core/src/clp/TimestampPattern.hpp b/components/core/src/clp/TimestampPattern.hpp index 7a62e58071..65c79627e2 100644 --- a/components/core/src/clp/TimestampPattern.hpp +++ b/components/core/src/clp/TimestampPattern.hpp @@ -46,6 +46,7 @@ namespace clp { * - 3 Milliseconds * - 6 Microseconds * - 9 Nanoseconds + * - z A UTC offset of the form [+-]hhmm */ class TimestampPattern { public: diff --git a/components/core/src/clp/clp/FileCompressor.cpp b/components/core/src/clp/clp/FileCompressor.cpp index c91571efdd..bcd0a4e999 100644 --- a/components/core/src/clp/clp/FileCompressor.cpp +++ b/components/core/src/clp/clp/FileCompressor.cpp @@ -16,6 +16,7 @@ #include "../LogSurgeonReader.hpp" #include "../Profiler.hpp" #include "../streaming_archive/writer/utils.hpp" +#include "../time_types.hpp" #include "utils.hpp" using clp::ir::eight_byte_encoded_variable_t; @@ -102,6 +103,9 @@ static void write_message_to_encoded_file( if (msg.has_ts_patt_changed()) { archive.change_ts_pattern(msg.get_ts_patt()); } + if (msg.has_utc_offset_changed()) { + archive.change_utc_offset(msg.get_utc_offset()); + } archive.write_msg(msg.get_ts(), msg.get_content(), msg.get_orig_num_bytes()); } @@ -522,6 +526,8 @@ std::error_code FileCompressor::compress_ir_stream_by_encoding( auto timestamp_pattern = log_event_deserializer.get_timestamp_pattern(); archive.change_ts_pattern(×tamp_pattern); + UtcOffset utc_offset{0}; + std::error_code error_code{}; while (true) { auto result = log_event_deserializer.deserialize_log_event(); @@ -546,6 +552,15 @@ std::error_code FileCompressor::compress_ir_stream_by_encoding( split_file(path, group_id, ×tamp_pattern, archive); } + // TODO This is inefficient and should be replaced with a callback mechanism where + // LogEventDeserializer calls archive.change_utc_offset when it finds a UTC offset change + // packet in the IR stream. + auto deserialized_utc_offset = log_event_deserializer.get_current_utc_offset(); + if (deserialized_utc_offset != utc_offset) { + utc_offset = deserialized_utc_offset; + archive.change_utc_offset(utc_offset); + } + archive.write_log_event_ir(result.value()); } diff --git a/components/core/src/clp/streaming_archive/Constants.hpp b/components/core/src/clp/streaming_archive/Constants.hpp index 0f9100f666..2157bc905a 100644 --- a/components/core/src/clp/streaming_archive/Constants.hpp +++ b/components/core/src/clp/streaming_archive/Constants.hpp @@ -36,6 +36,7 @@ constexpr char OrigFileId[] = "orig_file_id"; constexpr char Path[] = "path"; constexpr char BeginTimestamp[] = "begin_timestamp"; constexpr char EndTimestamp[] = "end_timestamp"; +constexpr char UtcOffsets[] = "utc_offsets"; constexpr char TimestampPatterns[] = "timestamp_patterns"; constexpr char NumUncompressedBytes[] = "num_uncompressed_bytes"; constexpr char NumMessages[] = "num_messages"; diff --git a/components/core/src/clp/streaming_archive/MetadataDB.cpp b/components/core/src/clp/streaming_archive/MetadataDB.cpp index 69f7dc395c..3ac9493b07 100644 --- a/components/core/src/clp/streaming_archive/MetadataDB.cpp +++ b/components/core/src/clp/streaming_archive/MetadataDB.cpp @@ -16,6 +16,7 @@ enum class FilesTableFieldIndexes : uint16_t { Path, BeginTimestamp, EndTimestamp, + UtcOffsets, TimestampPatterns, NumUncompressedBytes, NumMessages, @@ -150,6 +151,8 @@ static SQLitePreparedStatement get_files_select_statement( = streaming_archive::cMetadataDB::File::BeginTimestamp; field_names[enum_to_underlying_type(FilesTableFieldIndexes::EndTimestamp)] = streaming_archive::cMetadataDB::File::EndTimestamp; + field_names[enum_to_underlying_type(FilesTableFieldIndexes::UtcOffsets)] + = streaming_archive::cMetadataDB::File::UtcOffsets; field_names[enum_to_underlying_type(FilesTableFieldIndexes::TimestampPatterns)] = streaming_archive::cMetadataDB::File::TimestampPatterns; field_names[enum_to_underlying_type(FilesTableFieldIndexes::NumUncompressedBytes)] @@ -354,6 +357,22 @@ void MetadataDB::FileIterator::get_timestamp_patterns(string& timestamp_patterns ); } +void MetadataDB::FileIterator::get_utc_offsets( + uint64_t const*& offsets, + size_t& num_offsets +) const { + void const* value{nullptr}; + size_t value_size{0}; + m_statement.column_blob( + enum_to_underlying_type(FilesTableFieldIndexes::UtcOffsets), + value, + value_size + ); + + offsets = static_cast(value); + num_offsets = value_size / sizeof(offsets[0]); +} + size_t MetadataDB::FileIterator::get_num_uncompressed_bytes() const { return m_statement.column_int64( enum_to_underlying_type(FilesTableFieldIndexes::NumUncompressedBytes) @@ -435,6 +454,11 @@ void MetadataDB::open(string const& path) { file_field_names_and_types[enum_to_underlying_type(FilesTableFieldIndexes::EndTimestamp)].second = "INTEGER"; + file_field_names_and_types[enum_to_underlying_type(FilesTableFieldIndexes::UtcOffsets)].first + = streaming_archive::cMetadataDB::File::UtcOffsets; + file_field_names_and_types[enum_to_underlying_type(FilesTableFieldIndexes::UtcOffsets)].second + = "BLOB"; + file_field_names_and_types[enum_to_underlying_type(FilesTableFieldIndexes::TimestampPatterns)] .first = streaming_archive::cMetadataDB::File::TimestampPatterns; @@ -588,6 +612,13 @@ void MetadataDB::update_files(vector const& files) { enum_to_underlying_type(FilesTableFieldIndexes::EndTimestamp) + 1, file->get_end_ts() ); + auto encoded_utc_offsets = file->get_encoded_utc_offsets(); + m_upsert_file_statement->bind_blob64( + enum_to_underlying_type(FilesTableFieldIndexes::UtcOffsets) + 1, + encoded_utc_offsets.data(), + encoded_utc_offsets.size() * sizeof(decltype(encoded_utc_offsets)::value_type), + true + ); m_upsert_file_statement->bind_text( enum_to_underlying_type(FilesTableFieldIndexes::TimestampPatterns) + 1, file->get_encoded_timestamp_patterns(), diff --git a/components/core/src/clp/streaming_archive/MetadataDB.hpp b/components/core/src/clp/streaming_archive/MetadataDB.hpp index c0309f21ca..9be994eae1 100644 --- a/components/core/src/clp/streaming_archive/MetadataDB.hpp +++ b/components/core/src/clp/streaming_archive/MetadataDB.hpp @@ -89,6 +89,12 @@ class MetadataDB { epochtime_t get_begin_ts() const; epochtime_t get_end_ts() const; void get_timestamp_patterns(std::string& timestamp_patterns) const; + /** + * Gets the encoded UTC offsets. + * @param offsets Returns the encoded UTC offsets + * @param num_offsets Returns the number of encoded UTC offsets + */ + void get_utc_offsets(uint64_t const*& offsets, size_t& num_offsets) const; size_t get_num_uncompressed_bytes() const; size_t get_num_messages() const; size_t get_num_variables() const; diff --git a/components/core/src/clp/streaming_archive/reader/Archive.cpp b/components/core/src/clp/streaming_archive/reader/Archive.cpp index 3185fd53a5..69db3f87f1 100644 --- a/components/core/src/clp/streaming_archive/reader/Archive.cpp +++ b/components/core/src/clp/streaming_archive/reader/Archive.cpp @@ -11,6 +11,7 @@ #include "../../EncodedVariableInterpreter.hpp" #include "../../spdlog_with_specializations.hpp" #include "../../time_types.hpp" +#include "../../TimestampPattern.hpp" #include "../../Utils.hpp" #include "../ArchiveMetadata.hpp" #include "../Constants.hpp" @@ -189,28 +190,18 @@ bool Archive::decompress_message( return false; } - // Determine which timestamp pattern to use - auto const& timestamp_patterns = file.get_timestamp_patterns(); - if (!timestamp_patterns.empty() - && compressed_msg.get_message_number() - >= timestamp_patterns[file.get_current_ts_pattern_ix()].first) - { - while (true) { - if (file.get_current_ts_pattern_ix() >= timestamp_patterns.size() - 1) { - // Already at last timestamp pattern - break; - } - auto next_patt_start_message_num - = timestamp_patterns[file.get_current_ts_pattern_ix() + 1].first; - if (compressed_msg.get_message_number() < next_patt_start_message_num) { - // Not yet time for next timestamp pattern - break; - } - file.increment_current_ts_pattern_ix(); - } - timestamp_patterns[file.get_current_ts_pattern_ix()].second.insert_formatted_timestamp( + TimestampPattern const* timestamp_pattern{nullptr}; + UtcOffset utc_offset{0}; + file.get_timestamp_pattern_and_utc_offset( + compressed_msg.get_message_number(), + timestamp_pattern, + utc_offset + ); + + if (nullptr != timestamp_pattern) { + timestamp_pattern->insert_formatted_timestamp( compressed_msg.get_ts_in_milli(), - UtcOffset{0}, + utc_offset, decompressed_msg ); } diff --git a/components/core/src/clp/streaming_archive/reader/File.cpp b/components/core/src/clp/streaming_archive/reader/File.cpp index 232170fc6d..9ad45a63d5 100644 --- a/components/core/src/clp/streaming_archive/reader/File.cpp +++ b/components/core/src/clp/streaming_archive/reader/File.cpp @@ -5,6 +5,7 @@ #include "../../EncodedVariableInterpreter.hpp" #include "../../spdlog_with_specializations.hpp" +#include "../../time_types.hpp" #include "../Constants.hpp" #include "SegmentManager.hpp" @@ -33,6 +34,22 @@ ErrorCode File::open_me( m_begin_ts = file_metadata_ix.get_begin_ts(); m_end_ts = file_metadata_ix.get_end_ts(); + uint64_t const* encoded_utc_offsets{nullptr}; + size_t num_utc_offsets{0}; + file_metadata_ix.get_utc_offsets(encoded_utc_offsets, num_utc_offsets); + if (num_utc_offsets % 2 != 0) { + throw OperationFailed(ErrorCode_Corrupt, __FILENAME__, __LINE__); + } + for (size_t i = 0; i < num_utc_offsets; i += 2) { + uint64_t msg_num = encoded_utc_offsets[i]; + uint64_t encoded_offset = encoded_utc_offsets[i + 1]; + m_utc_offsets.emplace_back( + std::piecewise_construct, + std::forward_as_tuple(msg_num), + std::forward_as_tuple(encoded_offset) + ); + } + string encoded_timestamp_patterns; file_metadata_ix.get_timestamp_patterns(encoded_timestamp_patterns); size_t begin_pos = 0; @@ -142,6 +159,7 @@ ErrorCode File::open_me( m_msgs_ix = 0; m_variables_ix = 0; + m_current_utc_offset_ix = 0; m_current_ts_pattern_ix = 0; m_current_ts_in_milli = m_begin_ts; @@ -162,8 +180,10 @@ void File::close_me() { m_variables_ix = 0; m_num_variables = 0; + m_current_utc_offset_ix = 0; m_current_ts_pattern_ix = 0; m_current_ts_in_milli = 0; + m_utc_offsets.clear(); m_timestamp_patterns.clear(); m_begin_ts = cEpochTimeMax; @@ -190,12 +210,47 @@ epochtime_t File::get_current_ts_in_milli() const { return m_current_ts_in_milli; } -size_t File::get_current_ts_pattern_ix() const { - return m_current_ts_pattern_ix; -} +void File::get_timestamp_pattern_and_utc_offset( + uint64_t msg_num, + TimestampPattern const*& pattern, + UtcOffset& offset +) { + pattern = nullptr; + if (false == m_timestamp_patterns.empty() + && msg_num >= m_timestamp_patterns[m_current_ts_pattern_ix].first) + { + while (true) { + if (m_current_ts_pattern_ix >= m_timestamp_patterns.size() - 1) { + // Already at last timestamp pattern + break; + } + auto next_pattern_begin_msg_num + = m_timestamp_patterns[m_current_ts_pattern_ix + 1].first; + if (msg_num < next_pattern_begin_msg_num) { + // Not yet time for next timestamp pattern + break; + } + ++m_current_ts_pattern_ix; + } + pattern = &(m_timestamp_patterns[m_current_ts_pattern_ix].second); + } -void File::increment_current_ts_pattern_ix() { - ++m_current_ts_pattern_ix; + offset = UtcOffset{0}; + if (false == m_utc_offsets.empty() && msg_num >= m_utc_offsets[m_current_utc_offset_ix].first) { + while (true) { + if (m_current_utc_offset_ix >= m_utc_offsets.size() - 1) { + // Already at last UTC offset + break; + } + auto next_utc_offset_begin_msg_num = m_utc_offsets[m_current_utc_offset_ix + 1].first; + if (msg_num < next_utc_offset_begin_msg_num) { + // Not yet time for next UTC offset + break; + } + ++m_current_utc_offset_ix; + } + offset = m_utc_offsets[m_current_utc_offset_ix].second; + } } bool File::find_message_in_time_range( diff --git a/components/core/src/clp/streaming_archive/reader/File.hpp b/components/core/src/clp/streaming_archive/reader/File.hpp index 8f162d3a58..6bfb2a4ba9 100644 --- a/components/core/src/clp/streaming_archive/reader/File.hpp +++ b/components/core/src/clp/streaming_archive/reader/File.hpp @@ -47,6 +47,7 @@ class File { m_logtypes(nullptr), m_timestamps(nullptr), m_variables(nullptr), + m_current_utc_offset_ix(0), m_current_ts_pattern_ix(0), m_current_ts_in_milli(0) {} @@ -91,11 +92,27 @@ class File { */ void reset_indices(); + std::vector> const& get_utc_offsets() const { + return m_utc_offsets; + } + std::vector> const& get_timestamp_patterns() const; epochtime_t get_current_ts_in_milli() const; - size_t get_current_ts_pattern_ix() const; - void increment_current_ts_pattern_ix(); + /** + * Gets the timestamp pattern and UTC offset corresponding to the message with the given index. + * NOTE: This method assumes that the caller will only provide monotonically increasing message + * indices until the file's indices have been reset. + * @param msg_num + * @param pattern Returns the corresponding timestamp pattern. The pointer is valid as long as + * the file is open. + * @param offset Returns the corresponding UTC offset + */ + void get_timestamp_pattern_and_utc_offset( + uint64_t msg_num, + TimestampPattern const*& pattern, + UtcOffset& offset + ); /** * Finds message that falls in given time range @@ -129,6 +146,7 @@ class File { epochtime_t m_begin_ts; epochtime_t m_end_ts; + std::vector> m_utc_offsets; std::vector> m_timestamp_patterns; std::string m_id_as_string; std::string m_orig_file_id_as_string; @@ -153,6 +171,7 @@ class File { epochtime_t* m_timestamps; encoded_variable_t* m_variables; + size_t m_current_utc_offset_ix; size_t m_current_ts_pattern_ix; epochtime_t m_current_ts_in_milli; diff --git a/components/core/src/clp/streaming_archive/writer/Archive.cpp b/components/core/src/clp/streaming_archive/writer/Archive.cpp index 386d1b052f..087c0387de 100644 --- a/components/core/src/clp/streaming_archive/writer/Archive.cpp +++ b/components/core/src/clp/streaming_archive/writer/Archive.cpp @@ -282,6 +282,13 @@ void Archive::set_file_is_split(bool is_split) { m_file->set_is_split(is_split); } +void Archive::change_utc_offset(UtcOffset utc_offset) { + if (nullptr == m_file) { + throw OperationFailed(ErrorCode_Unsupported, __FILENAME__, __LINE__); + } + m_file->change_utc_offset(utc_offset); +} + void Archive::change_ts_pattern(TimestampPattern const* pattern) { if (m_file == nullptr) { throw OperationFailed(ErrorCode_Unsupported, __FILENAME__, __LINE__); @@ -317,6 +324,7 @@ void Archive::write_msg_using_schema(LogEventView const& log_view) { UtcOffset utc_offset{0}; TimestampPattern* timestamp_pattern = nullptr; auto const& log_output_buffer = log_view.get_log_output_buffer(); + // TODO Handle parsing and setting UTC offset if (log_output_buffer->has_timestamp()) { size_t start; size_t end; diff --git a/components/core/src/clp/streaming_archive/writer/Archive.hpp b/components/core/src/clp/streaming_archive/writer/Archive.hpp index 2ef946c9e4..f48c4478f6 100644 --- a/components/core/src/clp/streaming_archive/writer/Archive.hpp +++ b/components/core/src/clp/streaming_archive/writer/Archive.hpp @@ -19,6 +19,7 @@ #include "../../GlobalMetadataDB.hpp" #include "../../ir/LogEvent.hpp" #include "../../LogTypeDictionaryWriter.hpp" +#include "../../time_types.hpp" #include "../../VariableDictionaryWriter.hpp" #include "../ArchiveMetadata.hpp" #include "../MetadataDB.hpp" @@ -128,11 +129,17 @@ class Archive { */ void set_file_is_split(bool is_split); + /** + * Wrapper for streaming_archive::writer::File::change_utc_offset. + * @param utc_offset + */ + void change_utc_offset(UtcOffset utc_offset); /** * Wrapper for streaming_archive::writer::File::change_ts_pattern * @param pattern */ void change_ts_pattern(TimestampPattern const* pattern); + /** * Encodes and writes a message to the current encoded file * @param timestamp diff --git a/components/core/src/clp/streaming_archive/writer/File.cpp b/components/core/src/clp/streaming_archive/writer/File.cpp index b0e627ac66..02438a008b 100644 --- a/components/core/src/clp/streaming_archive/writer/File.cpp +++ b/components/core/src/clp/streaming_archive/writer/File.cpp @@ -1,6 +1,7 @@ #include "File.hpp" #include "../../EncodedVariableInterpreter.hpp" +#include "../../time_types.hpp" using std::string; using std::to_string; @@ -84,6 +85,11 @@ void File::write_encoded_msg( m_is_metadata_clean = false; } +void File::change_utc_offset(UtcOffset utc_offset) { + m_utc_offsets.emplace_back(m_num_messages, utc_offset); + m_is_metadata_clean = false; +} + void File::change_ts_pattern(TimestampPattern const* pattern) { if (nullptr == pattern) { m_timestamp_patterns.emplace_back(m_num_messages, TimestampPattern()); @@ -109,6 +115,25 @@ void File::mark_metadata_as_clean() { m_is_metadata_clean = true; } +UtcOffset File::get_current_utc_offset() const { + if (m_utc_offsets.empty()) { + return UtcOffset{0}; + } + return m_utc_offsets.back().second; +} + +vector File::get_encoded_utc_offsets() const { + vector encoded_offsets; + + // TODO We could build this procedurally + for (auto const& pair : m_utc_offsets) { + encoded_offsets.emplace_back(pair.first); + encoded_offsets.emplace_back(pair.second.count()); + } + + return encoded_offsets; +} + string File::get_encoded_timestamp_patterns() const { string encoded_timestamp_patterns; string encoded_timestamp_pattern; diff --git a/components/core/src/clp/streaming_archive/writer/File.hpp b/components/core/src/clp/streaming_archive/writer/File.hpp index 8abbf70aca..c7006d292d 100644 --- a/components/core/src/clp/streaming_archive/writer/File.hpp +++ b/components/core/src/clp/streaming_archive/writer/File.hpp @@ -92,6 +92,12 @@ class File { size_t num_uncompressed_bytes ); + /** + * Changes the UTC offset in use at the current message in the file. + * @param utc_offset + */ + void change_utc_offset(UtcOffset utc_offset); + /** * Changes timestamp pattern in use at current message in file * @param pattern @@ -171,6 +177,10 @@ class File { return m_timestamp_patterns; } + UtcOffset get_current_utc_offset() const; + + std::vector get_encoded_utc_offsets() const; + std::string get_encoded_timestamp_patterns() const; uint64_t get_num_messages() const { return m_num_messages; } @@ -223,6 +233,7 @@ class File { epochtime_t m_begin_ts; epochtime_t m_end_ts; + std::vector> m_utc_offsets; std::vector> m_timestamp_patterns; group_id_t m_group_id; diff --git a/components/core/src/clp/streaming_archive/writer/utils.cpp b/components/core/src/clp/streaming_archive/writer/utils.cpp index c35a72692f..9fba37fdee 100644 --- a/components/core/src/clp/streaming_archive/writer/utils.cpp +++ b/components/core/src/clp/streaming_archive/writer/utils.cpp @@ -25,6 +25,7 @@ auto split_file( Archive& archive_writer ) -> void { auto const& encoded_file = archive_writer.get_file(); + auto last_utc_offset = encoded_file.get_current_utc_offset(); auto has_ts_pattern = encoded_file.has_ts_pattern(); auto orig_file_id = encoded_file.get_orig_file_id(); auto split_ix = encoded_file.get_split_ix(); @@ -36,6 +37,9 @@ auto split_file( // Initialize the file's timestamp pattern to the previous split's pattern archive_writer.change_ts_pattern(last_timestamp_pattern); } + if (last_utc_offset.count() != 0) { + archive_writer.change_utc_offset(last_utc_offset); + } } auto split_file_and_archive( @@ -46,6 +50,7 @@ auto split_file_and_archive( Archive& archive_writer ) -> void { auto const& encoded_file = archive_writer.get_file(); + auto last_utc_offset = encoded_file.get_current_utc_offset(); auto has_ts_pattern = encoded_file.has_ts_pattern(); auto orig_file_id = encoded_file.get_orig_file_id(); auto split_ix = encoded_file.get_split_ix(); @@ -59,6 +64,9 @@ auto split_file_and_archive( // Initialize the file's timestamp pattern to the previous split's pattern archive_writer.change_ts_pattern(last_timestamp_pattern); } + if (last_utc_offset.count() != 0) { + archive_writer.change_utc_offset(last_utc_offset); + } } auto close_file_and_append_to_segment(Archive& archive_writer) -> void { diff --git a/components/core/tests/test-TimestampPattern.cpp b/components/core/tests/test-TimestampPattern.cpp index e0609314da..adf1b8f1cd 100644 --- a/components/core/tests/test-TimestampPattern.cpp +++ b/components/core/tests/test-TimestampPattern.cpp @@ -209,6 +209,24 @@ void validate_timestamp_parsing_result( TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { TimestampPattern::init(); + search_and_validate_timestamp_pattern( + "2015-02-01T01:02:03.004-0500 content after", + {0, "%Y-%m-%dT%H:%M:%S.%3%z"}, + 1'422'752'523'004, + UtcOffset{std::chrono::seconds{-5 * 60 * 60}}, + 0, + 28 + ); + + search_and_validate_timestamp_pattern( + "2015-02-01T01:02:03,004+0500 content after", + {0, "%Y-%m-%dT%H:%M:%S,%3%z"}, + 1'422'752'523'004, + UtcOffset{std::chrono::seconds{5 * 60 * 60}}, + 0, + 28 + ); + search_and_validate_timestamp_pattern( "2015-02-01T01:02:03.004 content after", {0, "%Y-%m-%dT%H:%M:%S.%3"}, From e419d0a7418fd167d8a68e25f0a70ad21bb76707 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 31 Mar 2024 00:26:39 -0400 Subject: [PATCH 08/11] Add UTC offset to default LogEventDeserializer TimestampPattern. --- components/core/src/clp/ir/LogEventDeserializer.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/src/clp/ir/LogEventDeserializer.hpp b/components/core/src/clp/ir/LogEventDeserializer.hpp index b2e9463e73..8a789a34e7 100644 --- a/components/core/src/clp/ir/LogEventDeserializer.hpp +++ b/components/core/src/clp/ir/LogEventDeserializer.hpp @@ -73,7 +73,7 @@ class LogEventDeserializer { m_prev_msg_timestamp{ref_timestamp} {} // Variables - TimestampPattern m_timestamp_pattern{0, "%Y-%m-%dT%H:%M:%S.%3"}; + TimestampPattern m_timestamp_pattern{0, "%Y-%m-%dT%H:%M:%S.%3%z"}; UtcOffset m_utc_offset{0}; [[no_unique_address]] std::conditional_t< std::is_same_v, From 490368d5c880dfbe0434e373be2fa877a075f278 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 31 Mar 2024 00:27:38 -0400 Subject: [PATCH 09/11] Update IR stream version number. --- components/core/src/clp/ffi/ir_stream/protocol_constants.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/src/clp/ffi/ir_stream/protocol_constants.hpp b/components/core/src/clp/ffi/ir_stream/protocol_constants.hpp index 7d777ac5d8..1e359ca4f2 100644 --- a/components/core/src/clp/ffi/ir_stream/protocol_constants.hpp +++ b/components/core/src/clp/ffi/ir_stream/protocol_constants.hpp @@ -12,7 +12,7 @@ constexpr int8_t LengthUByte = 0x11; constexpr int8_t LengthUShort = 0x12; constexpr char VersionKey[] = "VERSION"; -constexpr char VersionValue[] = "0.0.1"; +constexpr char VersionValue[] = "0.0.2"; // The following regex can be used to validate a Semantic Versioning string. The source of the // regex can be found here: https://semver.org/ From 3638f5607d6e51d076337aeb4f716e52f8ac2895 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 31 Mar 2024 12:57:32 -0400 Subject: [PATCH 10/11] Bug-fix: Apply UTC offset before storing and restoring timestamp. --- components/core/src/clp/TimestampPattern.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/components/core/src/clp/TimestampPattern.cpp b/components/core/src/clp/TimestampPattern.cpp index 7f8da0977c..2610318abc 100644 --- a/components/core/src/clp/TimestampPattern.cpp +++ b/components/core/src/clp/TimestampPattern.cpp @@ -278,6 +278,7 @@ bool TimestampPattern::parse_timestamp( long microsecond = 0; long nanosecond = 0; bool is_pm = false; + utc_offset = UtcOffset{0}; size_t const format_length = m_format.length(); size_t format_ix = 0; @@ -804,6 +805,7 @@ bool TimestampPattern::parse_timestamp( auto unix_epoch_point = date::sys_days(date::year(1970) / 1 / 1); // Get timestamp since epoch auto duration_since_epoch = timestamp_point - unix_epoch_point; + duration_since_epoch -= utc_offset; // Convert to raw milliseconds timestamp = std::chrono::duration_cast(duration_since_epoch).count(); @@ -849,8 +851,8 @@ void TimestampPattern::insert_formatted_timestamp( new_msg.assign(msg, 0, ts_begin_ix); // Separate parts of timestamp - auto timestamp_point - = date::sys_days(date::year(1970) / 1 / 1) + std::chrono::milliseconds(timestamp); + auto timestamp_point = date::sys_days(date::year(1970) / 1 / 1) + + std::chrono::milliseconds(timestamp) + utc_offset; auto timestamp_date = date::floor(timestamp_point); int day_of_week_ix = (date::year_month_weekday(timestamp_date).weekday_indexed().weekday() - date::Sunday) From 2250201ec8d59d4b5a68e7decde528138abe82fd Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 31 Mar 2024 13:12:29 -0400 Subject: [PATCH 11/11] Apply bug-fix to unit tests. --- components/core/tests/test-TimestampPattern.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/core/tests/test-TimestampPattern.cpp b/components/core/tests/test-TimestampPattern.cpp index adf1b8f1cd..b93590311d 100644 --- a/components/core/tests/test-TimestampPattern.cpp +++ b/components/core/tests/test-TimestampPattern.cpp @@ -212,7 +212,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { search_and_validate_timestamp_pattern( "2015-02-01T01:02:03.004-0500 content after", {0, "%Y-%m-%dT%H:%M:%S.%3%z"}, - 1'422'752'523'004, + 1'422'770'523'004, UtcOffset{std::chrono::seconds{-5 * 60 * 60}}, 0, 28 @@ -221,7 +221,7 @@ TEST_CASE("Test known timestamp patterns", "[KnownTimestampPatterns]") { search_and_validate_timestamp_pattern( "2015-02-01T01:02:03,004+0500 content after", {0, "%Y-%m-%dT%H:%M:%S,%3%z"}, - 1'422'752'523'004, + 1'422'734'523'004, UtcOffset{std::chrono::seconds{5 * 60 * 60}}, 0, 28