DOC: cymru cap parser: document parsing of new format

Sebastian Wagner · Sebastian Wagner · commit 156186ddb622 · 2021-03-12T14:05:12.000+01:00
diff --git a/intelmq/bots/parsers/cymru/parser_cap_program.py b/intelmq/bots/parsers/cymru/parser_cap_program.py
@@ -228,6 +228,24 @@ def parse_line_old(self, line, report):
         yield event
 
     def parse_line_new(self, line, report):
+        """
+        The format is two following:
+        category|address|asn|timestamp|optional_information|asninfo
+        Therefore very similar to CSV, just with the pipe as separator
+        category: the type (resulting in classification.*) and optional_information needs to be parsed differently per category
+        address: source.ip
+        asn: source.asn
+        timestamp: time.source
+        optional_information: needs special care.
+            For some categories it needs parsing, as it contains a mapping of keys to values, whereas the meaning of the keys can differ between the categories
+            For categories in MAPING_COMMENT, this field only contains one value.
+            For the category 'bruteforce' *both* situations apply.
+            Previously, the bruteforce events only had the protocol in the comment,
+            while most other categories had a mapping. Now, the bruteforce categories also uses
+            the type-value syntax. So we need to support both formats, the old and the new.
+            See also https://github.com/certtools/intelmq/issues/1794
+        asninfo: source.as_name
+        """
         category, ip, asn, timestamp, notes, asninfo = line.split('|')
 
         # to detect bogous lines like 'hostname: sub.example.comport: 80'