3030from datetime import datetime , timedelta
3131from typing import Any , List , Optional , Union
3232
33- import psutil
34-
3533import intelmq .lib .message as libmessage
3634from intelmq import (DEFAULT_LOGGING_PATH ,
3735 HARMONIZATION_CONF_FILE ,
@@ -942,7 +940,7 @@ class ParserBot(Bot):
942940 _csv_params = {}
943941 _ignore_lines_starting = []
944942 _handle = None
945- _current_line = None
943+ _current_line : Optional [ str ] = None
946944
947945 def __init__ (self , bot_id : str , start : bool = False , sighup_event = None ,
948946 disable_multithreading : bool = None ):
@@ -956,6 +954,7 @@ def __init__(self, bot_id: str, start: bool = False, sighup_event=None,
956954 def parse_csv (self , report : libmessage .Report ):
957955 """
958956 A basic CSV parser.
957+ The resulting lines are lists.
959958 """
960959 raw_report : str = utils .base64_decode (report .get ("raw" )).strip ()
961960 raw_report = raw_report .translate ({0 : None })
@@ -971,6 +970,7 @@ def parse_csv(self, report: libmessage.Report):
971970 def parse_csv_dict (self , report : libmessage .Report ):
972971 """
973972 A basic CSV Dictionary parser.
973+ The resulting lines are dictionaries with the column names as keys.
974974 """
975975 raw_report : str = utils .base64_decode (report .get ("raw" )).strip ()
976976 raw_report : str = raw_report .translate ({0 : None })
@@ -1024,6 +1024,7 @@ def parse(self, report: libmessage.Report):
10241024 for line in utils .base64_decode (report .get ("raw" )).splitlines ():
10251025 line = line .strip ()
10261026 if not any ([line .startswith (prefix ) for prefix in self ._ignore_lines_starting ]):
1027+ self ._current_line = line
10271028 yield line
10281029
10291030 def parse_line (self , line : Any , report : libmessage .Report ):
@@ -1063,14 +1064,14 @@ def process(self):
10631064 events : list [libmessage .Event ] = [value ]
10641065 except Exception :
10651066 self .logger .exception ('Failed to parse line.' )
1066- self .__failed .append ((traceback .format_exc (), line ))
1067+ self .__failed .append ((traceback .format_exc (), self . _current_line ))
10671068 else :
10681069 events_count += len (events )
10691070 self .send_message (* events )
10701071
1071- for exc , line in self .__failed :
1072+ for exc , original_line in self .__failed :
10721073 report_dump : libmessage .Message = report .copy ()
1073- report_dump .change ('raw' , self .recover_line (line ))
1074+ report_dump .change ('raw' , self .recover_line (original_line ))
10741075 if self .error_dump_message :
10751076 self ._dump_message (exc , report_dump )
10761077 if self .destination_queues and '_on_error' in self .destination_queues :
@@ -1115,21 +1116,34 @@ def recover_line(self, line: Optional[str] = None) -> str:
11151116 line = line if line else self ._current_line
11161117 return '\n ' .join (tempdata + [line ])
11171118
1118- def recover_line_csv (self , line : str ) -> str :
1119- out = io .StringIO ()
1120- writer = csv .writer (out , ** self ._csv_params )
1121- writer .writerow (line )
1119+ def recover_line_csv (self , line : Optional [list ]) -> str :
1120+ """
1121+ Parameter:
1122+ line: Optional line as list. If absent, the current line is used as string.
1123+ """
1124+ if line :
1125+ out = io .StringIO ()
1126+ writer = csv .writer (out , ** self ._csv_params )
1127+ writer .writerow (line )
1128+ result = out .getvalue ()
1129+ else :
1130+ result = self ._current_line
11221131 tempdata = '\r \n ' .join (self .tempdata ) + '\r \n ' if self .tempdata else ''
1123- return tempdata + out . getvalue ()
1132+ return tempdata + result
11241133
1125- def recover_line_csv_dict (self , line : str ) -> str :
1134+ def recover_line_csv_dict (self , line : Union [ dict , str , None ] = None ) -> str :
11261135 """
11271136 Converts dictionaries to csv. self.csv_fieldnames must be list of fields.
11281137 """
11291138 out = io .StringIO ()
11301139 writer = csv .DictWriter (out , self .csv_fieldnames , ** self ._csv_params )
11311140 writer .writeheader ()
1132- out .write (self ._current_line )
1141+ if isinstance (line , dict ):
1142+ writer .writerow (line )
1143+ elif isinstance (line , str ):
1144+ out .write (line )
1145+ else :
1146+ out .write (self ._current_line )
11331147
11341148 return out .getvalue ().strip ()
11351149
@@ -1138,20 +1152,29 @@ def recover_line_json(self, line: dict) -> str:
11381152 Reverse of parse for JSON pulses.
11391153
11401154 Recovers a fully functional report with only the problematic pulse.
1155+ Using a string as input here is not possible, as the input may span over multiple lines.
1156+ Output is not identical to the input, but has the same content.
1157+
1158+ Parameters:
1159+ The line as dict.
1160+
1161+ Returns:
1162+ str: The JSON-encoded line as string.
11411163 """
11421164 return json .dumps ([line ])
11431165
1144- def recover_line_json_stream (self , line = None ) -> str :
1166+ def recover_line_json_stream (self , line : Optional [ str ] = None ) -> str :
11451167 """
1146- recover_line for json streams, just returns the current line, unparsed.
1168+ recover_line for JSON streams (one JSON element per line, no outer structure),
1169+ just returns the current line, unparsed.
11471170
11481171 Parameters:
1149- line: None, not required, only for compatibility with other recover_line methods
1172+ line: The line itself as dict, if available, falls back to original current line
11501173
11511174 Returns:
11521175 str: unparsed JSON line.
11531176 """
1154- return self ._current_line
1177+ return line if line else self ._current_line
11551178
11561179
11571180class CollectorBot (Bot ):
0 commit comments