@@ -853,3 +853,150 @@ TEST_CASE("multi_line_with_delimited_vars", "[BufferParser]") {
853853
854854 parse_and_validate (buffer_parser, cInput, {expected_event1, expected_event2});
855855}
856+
857+ /* *
858+ * @ingroup test_buffer_parser_capture
859+ * @brief Tests a multi-capture rule parsing an Android log.
860+ *
861+ * This test verifies that a multi-capture rule correctly identifies the location of each capture
862+ * group. It tests that `BufferParser` correctly flattens the logtype, as well as stores the full
863+ * tree correctly.
864+ *
865+ * ### Schema Definition
866+ * @code
867+ * delimiters: \n\r\[:,
868+ * header:(?<timestamp>\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}) (?<PID>\d{4}) (?<TID>\d{4}) \
869+ * (?<LogLevel>I|D|E|W)
870+ * @endcode
871+ *
872+ * ### Input Example
873+ * @code
874+ * "1999-12-12T01:02:03.456 1234 5678 I MyService A=TEXT B=1.1"
875+ * @endcode
876+ *
877+ * ### Expected Logtype
878+ * @code
879+ * "<timestamp> <PID> <TID> <LogLevel> MyService A=TEXT B=1.1"
880+ * @endcode
881+ *
882+ * ### Expected Tokenization
883+ * @code
884+ * "1999-12-12T01:02:03.456 1234 5678 I" -> "header"
885+ * " MyService" -> uncaught string
886+ * " A=TEXT" -> uncaught string
887+ * " B=1.1" -> uncaught string
888+ * @endcode
889+ */
890+ TEST_CASE (" multi_capture_one" , " [BufferParser]" ) {
891+ constexpr string_view cDelimitersSchema{R"( delimiters: \n\r\[:,)" };
892+ constexpr string_view cTime{R"( (?<timestamp>\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}))" };
893+ constexpr string_view cPid{R"( (?<PID>\d{4}))" };
894+ constexpr string_view cTid{R"( (?<TID>\d{4}))" };
895+ constexpr string_view cLogLevel{R"( (?<LogLevel>I|D|E|W))" };
896+ constexpr string_view cInput{" 1999-12-12T01:02:03.456 1234 5678 I MyService A=TEXT B=1.1" };
897+
898+ string const header_rule{fmt::format (" header:{} {} {} {}" , cTime, cPid, cTid, cLogLevel)};
899+ ExpectedEvent const expected_event{
900+ .m_logtype {" <timestamp> <PID> <TID> <LogLevel> MyService A=TEXT B=1.1" },
901+ .m_timestamp_raw {" " },
902+ .m_tokens {
903+ {{" 1999-12-12T01:02:03.456 1234 5678 I" ,
904+ " header" ,
905+ {{{" timestamp" , {{0 }, {23 }}},
906+ {" PID" , {{24 }, {28 }}},
907+ {" TID" , {{29 }, {33 }}},
908+ {" LogLevel" , {{34 }, {35 }}}}}},
909+ {" MyService" , " " , {}},
910+ {" A=TEXT" , " " , {}},
911+ {" B=1.1" , " " , {}}}
912+ }
913+ };
914+
915+ Schema schema;
916+ schema.add_delimiters (cDelimitersSchema);
917+ schema.add_variable (header_rule, -1 );
918+ BufferParser buffer_parser{std::move (schema.release_schema_ast_ptr ())};
919+
920+ parse_and_validate (buffer_parser, cInput, {expected_event});
921+ }
922+
923+ /* *
924+ * @ingroup test_buffer_parser_capture
925+ * @brief Tests a multi-capture rule parsing a Kubernetes log.
926+ *
927+ * This test also verifies that a multi-capture rule correctly identifies the location of each
928+ * capture group. It tests that `BufferParser` correctly flattens the logtype, as well as stores the
929+ * full tree correctly.
930+ *
931+ * ### Schema Definition
932+ * @code
933+ * delimiters: \n\r\[:,
934+ * header:(?<timestamp>[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2}) ip\-(?<IP>\d{3}\-\d{2}\-\d{2}\-\d{2}) \
935+ * ku\[(?<PID>\d{4})\]: (?<LogLevel>I|D|E|W)(?<LID>\d{4}) \
936+ * (?<LTime>\d{2}:\d{2}:\d{2}\.\d{4}) (?<TID>\d{4})
937+ * @endcode
938+ *
939+ * ### Input Example
940+ * @code
941+ * "Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111 Y failed"
942+ * @endcode
943+ *
944+ * ### Expected Logtype
945+ * @code
946+ * "<timestamp> ip-<IP> ku[<PID>]: <LogLevel><LID> <LTime> <TID> Y failed"
947+ * @endcode
948+ *
949+ * ### Expected Tokenization
950+ * @code
951+ * "Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111" -> "header"
952+ * " Y" -> uncaught string
953+ * " failed" -> uncaught string
954+ * @endcode
955+ */
956+ TEST_CASE (" multi_capture_two" , " [BufferParser]" ) {
957+ constexpr string_view cDelimitersSchema{R"( delimiters: \n\r\[:,)" };
958+ constexpr string_view cTime{R"( (?<timestamp>[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2}))" };
959+ constexpr string_view cIp{R"( (?<IP>\d{3}\-\d{2}\-\d{2}\-\d{2}))" };
960+ constexpr string_view cPid{R"( (?<PID>\d{4}))" };
961+ constexpr string_view cLogLevel{R"( (?<LogLevel>I|D|E|W))" };
962+ constexpr string_view cLid{R"( (?<LID>\d{4}))" };
963+ constexpr string_view cLTime{R"( (?<LTime>\d{2}:\d{2}:\d{2}\.\d{4}))" };
964+ constexpr string_view cTid{R"( (?<TID>\d{4}))" };
965+ constexpr string_view cInput{" Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678"
966+ " 1111 Y failed" };
967+
968+ string const header_rule{fmt::format (
969+ R"( header:{} ip\-{} ku\[{}\]: {}{} {} {})" ,
970+ cTime,
971+ cIp,
972+ cPid,
973+ cLogLevel,
974+ cLid,
975+ cLTime,
976+ cTid
977+ )};
978+ ExpectedEvent const expected_event{
979+ .m_logtype {" <timestamp> ip-<IP> ku[<PID>]: <LogLevel><LID> <LTime> <TID> Y failed" },
980+ .m_timestamp_raw {" " },
981+ .m_tokens {
982+ {{" Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111" ,
983+ " header" ,
984+ {{{" timestamp" , {{0 }, {15 }}},
985+ {" IP" , {{19 }, {31 }}},
986+ {" PID" , {{35 }, {39 }}},
987+ {" LogLevel" , {{42 }, {43 }}},
988+ {" LID" , {{43 }, {47 }}},
989+ {" LTime" , {{48 }, {61 }}},
990+ {" TID" , {{65 }, {69 }}}}}},
991+ {" Y" , " " , {}},
992+ {" failed" , " " , {}}}
993+ }
994+ };
995+
996+ Schema schema;
997+ schema.add_delimiters (cDelimitersSchema);
998+ schema.add_variable (header_rule, -1 );
999+ BufferParser buffer_parser{std::move (schema.release_schema_ast_ptr ())};
1000+
1001+ parse_and_validate (buffer_parser, cInput, {expected_event});
1002+ }
0 commit comments