Skip to content

Commit 9e37a5d

Browse files
test: Add unit tests for multi-capture schema rules necessary in Android and Kubernetes logs. (#151)
Co-authored-by: SharafMohamed <[email protected]> Co-authored-by: davidlion <[email protected]>
1 parent 831a9f9 commit 9e37a5d

File tree

1 file changed

+147
-0
lines changed

1 file changed

+147
-0
lines changed

tests/test-buffer-parser.cpp

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,3 +853,150 @@ TEST_CASE("multi_line_with_delimited_vars", "[BufferParser]") {
853853

854854
parse_and_validate(buffer_parser, cInput, {expected_event1, expected_event2});
855855
}
856+
857+
/**
858+
* @ingroup test_buffer_parser_capture
859+
* @brief Tests a multi-capture rule parsing an Android log.
860+
*
861+
* This test verifies that a multi-capture rule correctly identifies the location of each capture
862+
* group. It tests that `BufferParser` correctly flattens the logtype, as well as stores the full
863+
* tree correctly.
864+
*
865+
* ### Schema Definition
866+
* @code
867+
* delimiters: \n\r\[:,
868+
* header:(?<timestamp>\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}) (?<PID>\d{4}) (?<TID>\d{4}) \
869+
* (?<LogLevel>I|D|E|W)
870+
* @endcode
871+
*
872+
* ### Input Example
873+
* @code
874+
* "1999-12-12T01:02:03.456 1234 5678 I MyService A=TEXT B=1.1"
875+
* @endcode
876+
*
877+
* ### Expected Logtype
878+
* @code
879+
* "<timestamp> <PID> <TID> <LogLevel> MyService A=TEXT B=1.1"
880+
* @endcode
881+
*
882+
* ### Expected Tokenization
883+
* @code
884+
* "1999-12-12T01:02:03.456 1234 5678 I" -> "header"
885+
* " MyService" -> uncaught string
886+
* " A=TEXT" -> uncaught string
887+
* " B=1.1" -> uncaught string
888+
* @endcode
889+
*/
890+
TEST_CASE("multi_capture_one", "[BufferParser]") {
891+
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"};
892+
constexpr string_view cTime{R"((?<timestamp>\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}))"};
893+
constexpr string_view cPid{R"((?<PID>\d{4}))"};
894+
constexpr string_view cTid{R"((?<TID>\d{4}))"};
895+
constexpr string_view cLogLevel{R"((?<LogLevel>I|D|E|W))"};
896+
constexpr string_view cInput{"1999-12-12T01:02:03.456 1234 5678 I MyService A=TEXT B=1.1"};
897+
898+
string const header_rule{fmt::format("header:{} {} {} {}", cTime, cPid, cTid, cLogLevel)};
899+
ExpectedEvent const expected_event{
900+
.m_logtype{"<timestamp> <PID> <TID> <LogLevel> MyService A=TEXT B=1.1"},
901+
.m_timestamp_raw{""},
902+
.m_tokens{
903+
{{"1999-12-12T01:02:03.456 1234 5678 I",
904+
"header",
905+
{{{"timestamp", {{0}, {23}}},
906+
{"PID", {{24}, {28}}},
907+
{"TID", {{29}, {33}}},
908+
{"LogLevel", {{34}, {35}}}}}},
909+
{" MyService", "", {}},
910+
{" A=TEXT", "", {}},
911+
{" B=1.1", "", {}}}
912+
}
913+
};
914+
915+
Schema schema;
916+
schema.add_delimiters(cDelimitersSchema);
917+
schema.add_variable(header_rule, -1);
918+
BufferParser buffer_parser{std::move(schema.release_schema_ast_ptr())};
919+
920+
parse_and_validate(buffer_parser, cInput, {expected_event});
921+
}
922+
923+
/**
924+
* @ingroup test_buffer_parser_capture
925+
* @brief Tests a multi-capture rule parsing a Kubernetes log.
926+
*
927+
* This test also verifies that a multi-capture rule correctly identifies the location of each
928+
* capture group. It tests that `BufferParser` correctly flattens the logtype, as well as stores the
929+
* full tree correctly.
930+
*
931+
* ### Schema Definition
932+
* @code
933+
* delimiters: \n\r\[:,
934+
* header:(?<timestamp>[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2}) ip\-(?<IP>\d{3}\-\d{2}\-\d{2}\-\d{2}) \
935+
* ku\[(?<PID>\d{4})\]: (?<LogLevel>I|D|E|W)(?<LID>\d{4}) \
936+
* (?<LTime>\d{2}:\d{2}:\d{2}\.\d{4}) (?<TID>\d{4})
937+
* @endcode
938+
*
939+
* ### Input Example
940+
* @code
941+
* "Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111 Y failed"
942+
* @endcode
943+
*
944+
* ### Expected Logtype
945+
* @code
946+
* "<timestamp> ip-<IP> ku[<PID>]: <LogLevel><LID> <LTime> <TID> Y failed"
947+
* @endcode
948+
*
949+
* ### Expected Tokenization
950+
* @code
951+
* "Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111" -> "header"
952+
* " Y" -> uncaught string
953+
* " failed" -> uncaught string
954+
* @endcode
955+
*/
956+
TEST_CASE("multi_capture_two", "[BufferParser]") {
957+
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"};
958+
constexpr string_view cTime{R"((?<timestamp>[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2}))"};
959+
constexpr string_view cIp{R"((?<IP>\d{3}\-\d{2}\-\d{2}\-\d{2}))"};
960+
constexpr string_view cPid{R"((?<PID>\d{4}))"};
961+
constexpr string_view cLogLevel{R"((?<LogLevel>I|D|E|W))"};
962+
constexpr string_view cLid{R"((?<LID>\d{4}))"};
963+
constexpr string_view cLTime{R"((?<LTime>\d{2}:\d{2}:\d{2}\.\d{4}))"};
964+
constexpr string_view cTid{R"((?<TID>\d{4}))"};
965+
constexpr string_view cInput{"Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678"
966+
" 1111 Y failed"};
967+
968+
string const header_rule{fmt::format(
969+
R"(header:{} ip\-{} ku\[{}\]: {}{} {} {})",
970+
cTime,
971+
cIp,
972+
cPid,
973+
cLogLevel,
974+
cLid,
975+
cLTime,
976+
cTid
977+
)};
978+
ExpectedEvent const expected_event{
979+
.m_logtype{"<timestamp> ip-<IP> ku[<PID>]: <LogLevel><LID> <LTime> <TID> Y failed"},
980+
.m_timestamp_raw{""},
981+
.m_tokens{
982+
{{"Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111",
983+
"header",
984+
{{{"timestamp", {{0}, {15}}},
985+
{"IP", {{19}, {31}}},
986+
{"PID", {{35}, {39}}},
987+
{"LogLevel", {{42}, {43}}},
988+
{"LID", {{43}, {47}}},
989+
{"LTime", {{48}, {61}}},
990+
{"TID", {{65}, {69}}}}}},
991+
{" Y", "", {}},
992+
{" failed", "", {}}}
993+
}
994+
};
995+
996+
Schema schema;
997+
schema.add_delimiters(cDelimitersSchema);
998+
schema.add_variable(header_rule, -1);
999+
BufferParser buffer_parser{std::move(schema.release_schema_ast_ptr())};
1000+
1001+
parse_and_validate(buffer_parser, cInput, {expected_event});
1002+
}

0 commit comments

Comments
 (0)