Skip to content

Commit 3baca90

Browse files
kbrajneeshSebastian Wagner
authored andcommitted
ENH: ignore tabs,spaces in new line or before comment
1 parent 2d52673 commit 3baca90

File tree

3 files changed

+15
-4
lines changed

3 files changed

+15
-4
lines changed

intelmq/bots/parsers/generic/parser_csv.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,12 @@ def init(self):
7070
def parse(self, report):
7171
raw_report = utils.base64_decode(report.get("raw"))
7272
raw_report = raw_report.translate({0: None})
73-
# ignore lines starting with #
74-
raw_report = re.sub(r'(?m)^#.*\n?', '', raw_report)
73+
# ignore lines starting with #. # can have leading spaces/tabs
74+
raw_report = re.sub(r'(?m)^[ \t]*#.*\n?', '', raw_report)
7575
# ignore null bytes
7676
raw_report = re.sub(r'(?m)\0', '', raw_report)
77+
# ignore lines having mix of spaces and tabs only
78+
raw_report = re.sub(r'(?m)^[ \t]*\n?', '', raw_report)
7779
# skip header
7880
if getattr(self.parameters, 'skip_header', False):
7981
self.tempdata.append(raw_report[:raw_report.find('\n')])

intelmq/tests/bots/parsers/generic/sample_report.csv

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
# comment with space before
2+
# comment with tab before
3+
# comment with mix of tabs and spaces before
4+
# below line contains spaces only
5+
6+
# below line contains tabs only
7+
8+
# below line contains mix of tabs and spaces only
9+
110
# nesmysl jak noha
211
2015-12-14 04:19:00 Testing Really bad actor site comment Nothing Unimportant www.cennoworld.com/Payment_Confirmation/Payment_Confirmation.zip 198.105.221.5 mail5.bulls.unisonplatform.com just another comment
312
2016-12-14 04:19:00 Testing Really bad actor site comment Nothing Unimportant www.cennoworld.com/Payment_Confirmation/Payment_Confirmation.zip 198.105.221.161 mail5.bulls.unisonplatform.com just another comment

intelmq/tests/bots/parsers/generic/test_parser_csv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@
2727
"source.fqdn": "mail5.bulls.unisonplatform.com",
2828
"event_description.text": "Really bad actor site comment",
2929
"classification.type": "malware",
30-
"raw": utils.base64_encode(SAMPLE_SPLIT[1].replace('\t', ',')+'\r\n'),
30+
"raw": utils.base64_encode(SAMPLE_SPLIT[10].replace('\t', ',')+'\r\n'),
3131
"time.observation": "2015-01-01T00:00:00+00:00",
3232
}
3333
EXAMPLE_EVENT2 = EXAMPLE_EVENT.copy()
3434
EXAMPLE_EVENT2['time.source'] = "2016-12-14T04:19:00+00:00"
3535
EXAMPLE_EVENT2['source.ip'] = "198.105.221.161"
36-
EXAMPLE_EVENT2["raw"] = utils.base64_encode(SAMPLE_SPLIT[2].replace('\t', ',')+'\r\n')
36+
EXAMPLE_EVENT2["raw"] = utils.base64_encode(SAMPLE_SPLIT[11].replace('\t', ',')+'\r\n')
3737

3838

3939
class TestGenericCsvParserBot(test.BotTestCase, unittest.TestCase):

0 commit comments

Comments
 (0)