From 316af6b03e77aae61921b771951ca369d8d5373f Mon Sep 17 00:00:00 2001 From: Ladislav Baco Date: Thu, 24 Apr 2025 01:05:21 +0200 Subject: [PATCH 1/7] Add TAXII Collector bot and STIX Parser bot As a bare minimum, TAXII Collector currently collects only the objects of type indicator. These objects contain information about indicators and the detection patterns, e.g. in stix, pcre, sigma, snort, suricata, yara format. The pattern, pattern_type and valid_from properties are required, while confidence, description and labels are only optional properties. However, they are present in several TAXII feeds and could be used to determine classification.taxonomy and classification.type even without processing the relationships of the indicators (e.g. indicator indicates malware) STIX Parser is currently capable of parsing objects of type indicator (usually retrieved from the TAXII Collector). From the indicator objects, it extracts the detection pattern (currently only the single Observation Expressions in STIX format are supported). It supports IP addresses, Domains and URLs indicator values. Moreover, this parser also attempts to extract some optional properties of STIX objects such as description and labels, which can be useful for futher classification of the event with the Expert Bots TAXII Collector tests for missing parameters and mock the simple TAXII server providing minimal collection with simple indicator object STIX Parser tests fo indicator patterns parsing Improvements based on @sebix comments, collection title used as feed.code Fix codestyle in TAXII and STIX bots Fix Python 3.8 support in STIX Parser bot --- .../bots/collectors/taxii/REQUIREMENTS.txt | 4 + intelmq/bots/collectors/taxii/__init__.py | 0 intelmq/bots/collectors/taxii/collector.py | 64 ++++++++++ intelmq/bots/parsers/stix/__init__.py | 0 intelmq/bots/parsers/stix/parser.py | 79 +++++++++++++ .../collectors/taxii/test_collector_bot.py | 108 +++++++++++++++++ .../bots/parsers/stix/test_parser_bot.py | 110 ++++++++++++++++++ 7 files changed, 365 insertions(+) create mode 100644 intelmq/bots/collectors/taxii/REQUIREMENTS.txt create mode 100644 intelmq/bots/collectors/taxii/__init__.py create mode 100644 intelmq/bots/collectors/taxii/collector.py create mode 100644 intelmq/bots/parsers/stix/__init__.py create mode 100644 intelmq/bots/parsers/stix/parser.py create mode 100644 intelmq/tests/bots/collectors/taxii/test_collector_bot.py create mode 100644 intelmq/tests/bots/parsers/stix/test_parser_bot.py diff --git a/intelmq/bots/collectors/taxii/REQUIREMENTS.txt b/intelmq/bots/collectors/taxii/REQUIREMENTS.txt new file mode 100644 index 0000000000..06a4889bdd --- /dev/null +++ b/intelmq/bots/collectors/taxii/REQUIREMENTS.txt @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# SPDX-License-Identifier: AGPL-3.0-or-later + +taxii2-client>=2.3.0 diff --git a/intelmq/bots/collectors/taxii/__init__.py b/intelmq/bots/collectors/taxii/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/intelmq/bots/collectors/taxii/collector.py b/intelmq/bots/collectors/taxii/collector.py new file mode 100644 index 0000000000..5d0e808e1d --- /dev/null +++ b/intelmq/bots/collectors/taxii/collector.py @@ -0,0 +1,64 @@ +""" +SPDX-FileCopyrightText: 2025 Ladislav Baco +SPDX-License-Identifier: AGPL-3.0-or-later + +Get indicator objects from TAXII server + +Configuration parameters: taxii collection (feed) url, username and password. +""" + +import datetime +import json +from requests.exceptions import HTTPError + +from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError + +try: + import taxii2client.v21 as taxii2 +except ImportError: + taxii2 = None + + +class TaxiiCollectorBot(CollectorBot): + """Collect data from TAXII Server""" + collection: str = None + username: str = None + password: str = None + rate_limit: int = 3600 + time_delta: int = 3600 + + def init(self): + if taxii2 is None: + raise MissingDependencyError('taxii2-client') + + if self.collection is None: + raise ValueError('No TAXII collection URL provided.') + if self.username is None: + raise ValueError('No TAXII username provided.') + if self.password is None: + raise ValueError('No TAXII password provided.') + + self._date_after = datetime.datetime.now() - datetime.timedelta(seconds=int(self.time_delta)) + + self._taxii_collection = taxii2.Collection(self.collection, user=self.username, password=self.password) + + def process(self): + try: + title = self._taxii_collection.title + self.logger.info('Collection title: %r.', title) + + # get the indicator objects + objects = self._taxii_collection.get_objects(added_after=self._date_after, type='indicator').get('objects', []) + for obj in objects: + report = self.new_report() + report.add('raw', json.dumps(obj)) + report.add('feed.url', self.collection) + report.add('feed.code', title) + self.send_message(report) + + except HTTPError as e: + self.logger.error('Connection error: %r!', e) + + +BOT = TaxiiCollectorBot diff --git a/intelmq/bots/parsers/stix/__init__.py b/intelmq/bots/parsers/stix/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/intelmq/bots/parsers/stix/parser.py b/intelmq/bots/parsers/stix/parser.py new file mode 100644 index 0000000000..4019983ebb --- /dev/null +++ b/intelmq/bots/parsers/stix/parser.py @@ -0,0 +1,79 @@ +""" +SPDX-FileCopyrightText: 2025 Ladislav Baco +SPDX-License-Identifier: AGPL-3.0-or-later + +Parse indicators objects in STIX format received from TAXII collector +""" + +import json + +from intelmq.lib.bot import ParserBot + + +class StixParserBot(ParserBot): + """Parse STIX indicators""" + parse = ParserBot.parse_json_stream + recover_line = ParserBot.recover_line_json_stream + + def parse_line(self, line, report): + """ Parse one STIX object of indicator type """ + object_type = line.get('type', '') + if object_type == 'indicator': + event = self.new_event(report) + event.add('raw', json.dumps(line)) + event.add('comment', line.get('description', '')) + event.add('extra.labels', line.get('labels', None)) + event.add('time.source', line.get('valid_from', '1970-01-01T00:00:00Z')) + # classification will be determined by expert bot specific for given TAXII collection + event.add('classification.type', 'undetermined') + + pattern = line.get('pattern', '') + # stix, pcre, sigma, snort, suricata, yara + pattern_type = line.get('pattern_type', '') + + if pattern_type == 'stix': + indicator = self.parse_stix_pattern(pattern) + if indicator: + event.add(indicator[0], indicator[1]) + yield event + else: + self.logger.warning('Unexpected type of pattern expression: %r, pattern: %r', pattern_type, pattern) + else: + self.logger.warning('Unexpected type of STIX object: %r', object_type) + + @staticmethod + def parse_stix_pattern(pattern): + """ + STIX Patterning: + https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_e8slinrhxcc9 + """ + if pattern.count('[') != 1: + print('Unsupported Pattern Expression. Only single Observation Expression is supported. Pattern: {}'.format(pattern)) + return + + value = pattern.split("'")[1] + if pattern.startswith('[url:value = '): + return ('source.url', value) + if pattern.startswith('[domain-name:value = '): + return ('source.fqdn', value) + if pattern.startswith('[ipv4-addr:value = '): + # remove port, sometimes the port is present in ETI + value = value.split(':')[0] + # strip CIDR if IPv4 network contains single host only + value = value[:-3] if value.endswith('/32') else value + # check if pattern is in CIDR notation + if value.rfind('/') > -1: + return ('source.network', value) + else: + return ('source.ip', value) + if pattern.startswith('[ipv6-addr:value = '): + # strip CIDR if IPv6 network contains single host only + value = value[:-4] if value.endswith('/128') else value + # check if pattern is in CIDR notation + if value.rfind('/') > -1: + return ('source.network', value) + else: + return ('source.ip', value) + + +BOT = StixParserBot diff --git a/intelmq/tests/bots/collectors/taxii/test_collector_bot.py b/intelmq/tests/bots/collectors/taxii/test_collector_bot.py new file mode 100644 index 0000000000..52b4df0503 --- /dev/null +++ b/intelmq/tests/bots/collectors/taxii/test_collector_bot.py @@ -0,0 +1,108 @@ +# SPDX-FileCopyrightText: 2016 Sebastian Wagner, 2025 Ladislav Baco +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Test with reports, based on intelmq/tests/lib/test_collector_bot.py +""" +import unittest + +import re +import requests_mock + +import intelmq.lib.bot as bot +import intelmq.lib.test as test +from intelmq.bots.collectors.taxii.collector import TaxiiCollectorBot + + +EXAMPLE_REPORT = {'__type': 'Report', + 'feed.name': 'Taxii Feed', + 'feed.code': 'feed stix2.1', + 'feed.provider': 'Taxii Provider', + 'feed.documentation': 'Taxii Documentation', + 'feed.accuracy': 100.0, + 'feed.url': 'http://localhost/feed', + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoifQ==' + } + +def prepare_mocker(mocker): + mocker.get( + 'http://localhost/feed/', + json={ + 'id': 'feed', + 'title': 'feed stix2.1', + 'can_read': True, + 'can_write': False + }, + headers={'Content-Type': 'application/taxii+json;version=2.1'} + ) + mocker.get( + re.compile('http://localhost/feed/objects/.*'), + json={ + 'id': 'feed', + 'title': 'feed stix2.1', + 'can_read': True, + 'can_write': False, + 'more': False, + 'objects': [{ + 'id': 'indicator--0', + 'type': 'indicator', + 'spec_version': '2.1', + 'created': '1970-01-01T00:00:00.000Z', + 'modified': '1970-01-01T00:00:00.000Z', + 'pattern': "[url:value = 'http://example.org']", + 'pattern_type': 'stix', + 'valid_from': '1970-01-01T00:00:00Z' + }]}, + headers={'Content-Type': 'application/taxii+json;version=2.1'} + ) + +@test.skip_exotic() +@requests_mock.Mocker() +class TestTaxiiCollectorBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for a TaxiiCollectorBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = TaxiiCollectorBot + cls.sysconfig = {'name': 'Taxii Feed', + 'provider': 'Taxii Provider', + 'documentation': 'Taxii Documentation', + 'collection': 'http://localhost/feed', + 'username': 'user', + 'password': 'pass' + } + + def test_event(self, mocker): + """ Test if correct Event has been produced. """ + prepare_mocker(mocker) + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_REPORT) + + def test_missing_collection(self, mocker): + """ Test if missing collection is detected. """ + with self.assertRaises(ValueError) as context: + self.run_bot(parameters={'collection': None}) + exception = context.exception + self.assertEqual(str(exception), 'No TAXII collection URL provided.') + + def test_missing_username(self, mocker): + """ Test if missing username is detected. """ + with self.assertRaises(ValueError) as context: + self.run_bot(parameters={'username': None}) + exception = context.exception + self.assertEqual(str(exception), 'No TAXII username provided.') + + def test_missing_password(self, mocker): + """ Test if missing password is detected. """ + with self.assertRaises(ValueError) as context: + self.run_bot(parameters={'password': None}) + exception = context.exception + self.assertEqual(str(exception), 'No TAXII password provided.') + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/stix/test_parser_bot.py b/intelmq/tests/bots/parsers/stix/test_parser_bot.py new file mode 100644 index 0000000000..855bde1602 --- /dev/null +++ b/intelmq/tests/bots/parsers/stix/test_parser_bot.py @@ -0,0 +1,110 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Test with example reports (STIX objects usually collected from TAXII server) +""" +import unittest + +import re +import requests_mock + +import intelmq.lib.bot as bot +import intelmq.lib.test as test +from intelmq.bots.parsers.stix.parser import StixParserBot + + +EXAMPLE_REPORT = {'__type': 'Report', + 'feed.name': 'Taxii Feed', + 'feed.code': 'feed stix2.1', + 'feed.provider': 'Taxii Provider', + 'feed.documentation': 'Taxii Documentation', + 'feed.accuracy': 100.0, + 'feed.url': 'http://localhost/feed', + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoifQ==' + } + +EXAMPLE_EVENT = {'__type': 'Event', + 'feed.name': 'Taxii Feed', + 'feed.code': 'feed stix2.1', + 'feed.provider': 'Taxii Provider', + 'feed.documentation': 'Taxii Documentation', + 'feed.accuracy': 100.0, + 'feed.url': 'http://localhost/feed', + 'source.url': 'http://example.org', + 'time.source': '1970-01-01T00:00:00+00:00', + 'classification.type': 'undetermined', + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoifQ==' + } + + +class TestStixParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for a StixParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = StixParserBot + cls.sysconfig = {} + + def test_event(self): + """ Test if correct Event has been produced. """ + self.input_message = EXAMPLE_REPORT + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_EVENT) + + + def test_pattern_url(self): + """ Test if url pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[url:value = 'http://example.org']") + self.assertEqual(str(indicator[0]), 'source.url') + self.assertEqual(str(indicator[1]), 'http://example.org') + + def test_pattern_url(self): + """ Test if domain pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[domain-name:value = 'example.org']") + self.assertEqual(str(indicator[0]), 'source.fqdn') + self.assertEqual(str(indicator[1]), 'example.org') + + def test_pattern_ipv4(self): + """ Test if ipv4 pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.1']") + self.assertEqual(str(indicator[0]), 'source.ip') + self.assertEqual(str(indicator[1]), '127.0.0.1') + + def test_pattern_ipv4_cidr(self): + """ Test if ipv4 cidr pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.0/8']") + self.assertEqual(str(indicator[0]), 'source.network') + self.assertEqual(str(indicator[1]), '127.0.0.0/8') + + def test_pattern_ipv4_cidr_single_host(self): + """ Test if ipv4 cidr with single host pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.1/32']") + self.assertEqual(str(indicator[0]), 'source.ip') + self.assertEqual(str(indicator[1]), '127.0.0.1') + + def test_pattern_ipv6(self): + """ Test if ipv6 pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = '::1']") + self.assertEqual(str(indicator[0]), 'source.ip') + self.assertEqual(str(indicator[1]), '::1') + + def test_pattern_ipv6_cidr(self): + """ Test if ipv6 cidr pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = 'fe:80::/10']") + self.assertEqual(str(indicator[0]), 'source.network') + self.assertEqual(str(indicator[1]), 'fe:80::/10') + + def test_pattern_ipv6_cidr_single_host(self): + """ Test if ipv6 cidr with single host pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = 'fe:80::1/128']") + self.assertEqual(str(indicator[0]), 'source.ip') + self.assertEqual(str(indicator[1]), 'fe:80::1') + + +if __name__ == '__main__': # pragma: no cover + unittest.main() From 873bef8804655a0aed8f63962954eed6fe65723c Mon Sep 17 00:00:00 2001 From: Ladislav Baco Date: Sat, 3 May 2025 01:43:25 +0200 Subject: [PATCH 2/7] Add ESET STIX Parser bot Parser bot for enriching events from ESET Threat Intelligence, which were collected by TaxiiCollectorBot. It inherits from generic StixParserBot and implement vendor-specific parsing. ESET STIX Parser bot analyzes comment (based on original description of STIX Indicator object) and choose proper classification type and if possible, also fills the malware.name in the event. --- intelmq/bots/parsers/stix/parser.py | 9 ++ intelmq/bots/parsers/stix/parser_eset.py | 111 ++++++++++++++++ .../bots/parsers/stix/test_parser_bot.py | 23 ++-- .../bots/parsers/stix/test_parser_eset_bot.py | 125 ++++++++++++++++++ 4 files changed, 256 insertions(+), 12 deletions(-) create mode 100644 intelmq/bots/parsers/stix/parser_eset.py create mode 100644 intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py diff --git a/intelmq/bots/parsers/stix/parser.py b/intelmq/bots/parsers/stix/parser.py index 4019983ebb..f8a406f8a1 100644 --- a/intelmq/bots/parsers/stix/parser.py +++ b/intelmq/bots/parsers/stix/parser.py @@ -35,12 +35,21 @@ def parse_line(self, line, report): indicator = self.parse_stix_pattern(pattern) if indicator: event.add(indicator[0], indicator[1]) + self.parse_vendor_specific(event, line, report) yield event else: self.logger.warning('Unexpected type of pattern expression: %r, pattern: %r', pattern_type, pattern) else: self.logger.warning('Unexpected type of STIX object: %r', object_type) + def parse_vendor_specific(self, event, line, report): + """ + Parse vendor specific details from the STIX 2.1 Indicator object. + This method by default does nothing and it is called just before IntelMQ event is yielded. + If we need vendor-specific STIX parser, we can inherit from this class and override this one method. + """ + return + @staticmethod def parse_stix_pattern(pattern): """ diff --git a/intelmq/bots/parsers/stix/parser_eset.py b/intelmq/bots/parsers/stix/parser_eset.py new file mode 100644 index 0000000000..21d5772b4b --- /dev/null +++ b/intelmq/bots/parsers/stix/parser_eset.py @@ -0,0 +1,111 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Parser bot for ESET Threat Intelligence feeds +This bot parses STIX Indicators objects received from TAXII collector +Then it analyzes event's comments based on STIX indicator's description +and it adds classification.type and malware family info +It is recommended to apply TaxonomyExpertBot then to map the taxonomy +""" + +import re + +from intelmq.bots.parsers.stix.parser import StixParserBot + + +CLASSIFICATION_BY_STRING = { + 'Host actively distributes high-severity malicious content in the form of executable code.': 'malware-distribution', + 'Host actively distributes high-severity threat in the form of executable code.': 'malware-distribution', + 'Host actively distributes high-severity threat in the form of malicious code.': 'malware-distribution', + 'Host actively distributes high-severity threat in the form of script code.': 'malware-distribution', + 'Host is known to be actively distributing adware or other medium-risk software.': 'malware-distribution', + 'Host is known to be actively distributing high-severity mobile threats or low-risk software.': 'other', + 'Host is known to be actively distributing threats or is of uncertain reputation.': 'other', + 'Host is known to be distributing low-risk and potentially unwanted content.': 'other', + 'Host actively distributes potentially unwanted or unsafe threat.': 'other', + 'Host is known source of phishing or other fraudulent content.': 'phishing', + 'Host is known source of active fraudulent content.': 'other', + 'Host is used as command and control server.': 'c2-server', + 'Web services scanning and attacks': 'scanner', + 'RDP bruteforce IP': 'brute-force', + 'SQL bruteforce IP': 'brute-force', + 'SMB bruteforce IP': 'brute-force', + 'MySQL bruteforce IP': 'brute-force', + 'FTP bruteforce IP': 'brute-force' +} + +CLASSIFICATION_REGEX = { + 'C&C indicates that a botnet ([^ ]+) ([^ ]+) is present.': 'c2-server', + 'C&C of ([^ ]+) ([^ ]+)': 'c2-server', + 'Host is used as command and control server of ([^ ]+) ([^ ]+) malware family.': 'c2-server', + 'Each of these file hashes indicates that a variant of ([^ ]+) ([^ ]+) is present.': 'malware', + '^[.* ]?([^ ]+) C&C server.*$': 'c2-server', + '^[.* ]?([^ ]+) backdoor.*$': 'malware', + '^[.* ]?([^ ]+) trojan.*$': 'malware', + '^[.* ]?([^ ]+) implant.*$': 'malware', + 'Loader for ([^ ]+).*$': 'malware' +} + +CLASSIFICATION_BY_REGEX = {} +for (regex, classification_type) in CLASSIFICATION_REGEX.items(): + CLASSIFICATION_BY_REGEX[re.compile(regex)] = classification_type + + +class ESETStixParserBot(StixParserBot): + """Add classification.type and malware family to events""" + + # Platform/Type.Family.Variant!Suffixes + # Type and suffixes are optional + _malware_naming_convention_pattern = re.compile(r'^([^/]*/)?([^\.]*\.)?([^\.]+)(\.[^!]*)(!.*)?$') + + def parse_vendor_specific(self, event, line, report): + classification_type = event.get('classification.type', 'undetermined') + if classification_type == 'undetermined': + comment = event.get('comment', '') + classification_type, malware_name = self.classify(comment) + event.add('classification.type', classification_type, overwrite=True) + if malware_name: + event.add('malware.name', malware_name) + else: + # classification.type already present, do not change it + pass + + @staticmethod + def classify(comment): + """ Classify comment and returns (classification_type, malware_name) """ + classification_type = CLASSIFICATION_BY_STRING.get(comment, None) + if classification_type: + malware_name = None + return (classification_type, malware_name) + + for (pattern, classification_type) in CLASSIFICATION_BY_REGEX.items(): + match = pattern.match(comment) + if match: + malware_name = None + groups = match.groups() + if len(groups) > 0: + malware = groups[0] + malware_name = ESETStixParserBot.extract_malware_family(malware) + return (classification_type, malware_name) + + return ('undetermined', None) + + @staticmethod + def extract_malware_family(malware): + """ Extract malware family from the threat detection string """ + + match = ESETStixParserBot._malware_naming_convention_pattern.match(malware) + if match and len(match.groups()) == 5: + malware_name = match.groups()[2] + else: + # usually just malware family (or unknown naming convention) + malware_name = malware + + # IntelMQ malware.name should be lowercase + return malware_name.lower() + + +BOT = ESETStixParserBot diff --git a/intelmq/tests/bots/parsers/stix/test_parser_bot.py b/intelmq/tests/bots/parsers/stix/test_parser_bot.py index 855bde1602..4e0695cd14 100644 --- a/intelmq/tests/bots/parsers/stix/test_parser_bot.py +++ b/intelmq/tests/bots/parsers/stix/test_parser_bot.py @@ -27,17 +27,17 @@ } EXAMPLE_EVENT = {'__type': 'Event', - 'feed.name': 'Taxii Feed', - 'feed.code': 'feed stix2.1', - 'feed.provider': 'Taxii Provider', - 'feed.documentation': 'Taxii Documentation', - 'feed.accuracy': 100.0, - 'feed.url': 'http://localhost/feed', - 'source.url': 'http://example.org', - 'time.source': '1970-01-01T00:00:00+00:00', - 'classification.type': 'undetermined', - 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoifQ==' - } + 'feed.name': 'Taxii Feed', + 'feed.code': 'feed stix2.1', + 'feed.provider': 'Taxii Provider', + 'feed.documentation': 'Taxii Documentation', + 'feed.accuracy': 100.0, + 'feed.url': 'http://localhost/feed', + 'source.url': 'http://example.org', + 'time.source': '1970-01-01T00:00:00+00:00', + 'classification.type': 'undetermined', + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoifQ==' + } class TestStixParserBot(test.BotTestCase, unittest.TestCase): @@ -56,7 +56,6 @@ def test_event(self): self.run_bot() self.assertMessageEqual(0, EXAMPLE_EVENT) - def test_pattern_url(self): """ Test if url pattern is parsed. """ indicator = self.bot_reference.parse_stix_pattern("[url:value = 'http://example.org']") diff --git a/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py b/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py new file mode 100644 index 0000000000..d40f07a3f4 --- /dev/null +++ b/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py @@ -0,0 +1,125 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Test with example reports (STIX objects usually collected from TAXII server) +""" +import unittest + +import re +import requests_mock + +import intelmq.lib.bot as bot +import intelmq.lib.test as test +from intelmq.bots.parsers.stix.parser_eset import ESETStixParserBot + + +EXAMPLE_REPORT = {'__type': 'Report', + 'feed.name': 'Botnet feed', + 'feed.code': 'botnet stix 2.1', + 'feed.provider': 'ESET', + 'feed.documentation': 'https://help.eset.com/eti_portal/en-US/botnet-feed.', + 'feed.accuracy': 100.0, + 'feed.url': 'https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/0abb06690b0b47e49cd7794396b76b20/', + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoiLCAiZGVzY3JpcHRpb24iOiAiQyZDIGluZGljYXRlcyB0aGF0IGEgYm90bmV0IFdpbjMyL1NweS5MdW1tYVN0ZWFsZXIuQiB0cm9qYW4gaXMgcHJlc2VudC4iLCAibGFiZWxzIjogWyJtYWxpY2lvdXMtYWN0aXZpdHkiXX0=' + } + +EXAMPLE_EVENT = {'__type': 'Event', + 'feed.name': 'Botnet feed', + 'feed.code': 'botnet stix 2.1', + 'feed.provider': 'ESET', + 'feed.documentation': 'https://help.eset.com/eti_portal/en-US/botnet-feed.', + 'feed.accuracy': 100.0, + 'feed.url': 'https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/0abb06690b0b47e49cd7794396b76b20/', + 'source.url': 'http://example.org', + 'time.source': '1970-01-01T00:00:00+00:00', + 'classification.type': 'c2-server', + 'malware.name': 'lummastealer', + 'comment': 'C&C indicates that a botnet Win32/Spy.LummaStealer.B trojan is present.', + 'extra.labels': ['malicious-activity'], + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoiLCAiZGVzY3JpcHRpb24iOiAiQyZDIGluZGljYXRlcyB0aGF0IGEgYm90bmV0IFdpbjMyL1NweS5MdW1tYVN0ZWFsZXIuQiB0cm9qYW4gaXMgcHJlc2VudC4iLCAibGFiZWxzIjogWyJtYWxpY2lvdXMtYWN0aXZpdHkiXX0=' + } + + +class TestESETStixParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for an ESETStixParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = ESETStixParserBot + cls.sysconfig = {} + + def test_event(self): + """ Test if correct Event has been produced. """ + self.input_message = EXAMPLE_REPORT + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_EVENT) + + def test_classification_by_string(self): + """ Test if correct classification based on string is returned. """ + classification_type, malware_name = self.bot_reference.classify('Host actively distributes high-severity malicious content in the form of executable code.') + self.assertEqual(str(classification_type), 'malware-distribution') + self.assertEqual(malware_name, None) + + classification_type, malware_name = self.bot_reference.classify('Host is known source of phishing or other fraudulent content.') + self.assertEqual(str(classification_type), 'phishing') + self.assertEqual(malware_name, None) + + classification_type, malware_name = self.bot_reference.classify('Host is used as command and control server.') + self.assertEqual(str(classification_type), 'c2-server') + self.assertEqual(malware_name, None) + + classification_type, malware_name = self.bot_reference.classify('Web services scanning and attacks') + self.assertEqual(str(classification_type), 'scanner') + self.assertEqual(malware_name, None) + + classification_type, malware_name = self.bot_reference.classify('RDP bruteforce IP') + self.assertEqual(str(classification_type), 'brute-force') + self.assertEqual(malware_name, None) + + def test_classification_by_regex(self): + """ Test if correct classification based on regex is returned. """ + classification_type, malware_name = self.bot_reference.classify('C&C indicates that a botnet Win32/Spy.LummaStealer.B trojan is present.') + self.assertEqual(str(classification_type), 'c2-server') + self.assertEqual(str(malware_name), 'lummastealer') + + classification_type, malware_name = self.bot_reference.classify('C&C of Win32/Spy.LummaStealer.B trojan') + self.assertEqual(str(classification_type), 'c2-server') + self.assertEqual(str(malware_name), 'lummastealer') + + classification_type, malware_name = self.bot_reference.classify('Host is used as command and control server of Win32/Emotet.BN trojan malware family.') + self.assertEqual(str(classification_type), 'c2-server') + self.assertEqual(str(malware_name), 'emotet') + + classification_type, malware_name = self.bot_reference.classify('WizardNet backdoor.') + self.assertEqual(str(classification_type), 'malware') + self.assertEqual(str(malware_name), 'wizardnet') + + classification_type, malware_name = self.bot_reference.classify('Loader for Emotet') + self.assertEqual(str(classification_type), 'malware') + self.assertEqual(str(malware_name), 'emotet') + + def test_unknown_classification(self): + """ Test if undetermined classification is returned when comment contains something unexpected. """ + classification_type, malware_name = self.bot_reference.classify('Example of unexpected comment.') + self.assertEqual(str(classification_type), 'undetermined') + self.assertEqual(malware_name, None) + + def test_malware_family_name_extraction(self): + """ Test if correct malwae family name is extracted from the given malware string. """ + malware_name = self.bot_reference.extract_malware_family('Win32/Spy.LummaStealer.B') + self.assertEqual(str(malware_name), 'lummastealer') + + malware_name = self.bot_reference.extract_malware_family('Win32/Rescoms.B') + self.assertEqual(str(malware_name), 'rescoms') + + malware_name = self.bot_reference.extract_malware_family('Emotet') + self.assertEqual(str(malware_name), 'emotet') + + +if __name__ == '__main__': # pragma: no cover + unittest.main() From fd8ed84652b819f6723f58d89956635ea286b4ad Mon Sep 17 00:00:00 2001 From: Ladislav Baco Date: Sat, 3 May 2025 02:15:01 +0200 Subject: [PATCH 3/7] Add ESET Threat Intelligence feeds ETI feeds with URLs, domains and IP addresses, which can be collected by TaxiiCollectorBot and parsed by ESETStixParserBot --- intelmq/etc/feeds.yaml | 228 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) diff --git a/intelmq/etc/feeds.yaml b/intelmq/etc/feeds.yaml index 258e91c1f7..bd647bc772 100644 --- a/intelmq/etc/feeds.yaml +++ b/intelmq/etc/feeds.yaml @@ -1802,6 +1802,234 @@ providers: revision: 2020-06-30 documentation: https://www.eset.com/int/business/services/threat-intelligence/ public: false + APT IoC: + description: Indicators of Compromise associated with APT groups' attacks. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/apt-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/97e3eb74ae5f46dd9e22f677a6938ee7/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Botnet: + description: Data from automated botnet tracking system. Indicators of Compromise include C&Cs (URLs) and MD5, SHA-1, SHA-256 (currently not implemented in TStixParserBot). + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/botnet-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/0abb06690b0b47e49cd7794396b76b20/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Botnet C&C: + description: Subset of a Botnet feed, provides information about URLs of Command and Control (C&C) servers and associated data. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/cc-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/d1923a526e8f400dbb301259240ee3d5/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Botnet Target: + description: Subset of a Botnet feed, provides information about the targets. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/target-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/61b6e4f9153e411ca7a9982a2c6ae788/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Cryptoscam: + description: Subset of scam domains and URLs that contain targeted information about the current and prevalent crypto scam domains, URLs, and associated data. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/cryptoscam_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/2c183ce9551a43338c6cc2ed7c2a704d/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Domain: + description: The feed covers the domain name, the data associated with it, and respective malicious activity. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/domain-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/a34aa0a4f9de419582a883863503f9c4/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + IP: + description: Current and prevalent malicious and abusive IPs and some data associated with them. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/ip-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/baaed2a92335418aa753fe944e13c23a/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Phishing URL: + description: Phishing URLs direct recipients to fake websites and attempt to entice them into divulging sensitive data such as login credentials or financial information. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/phishing_url_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/d0a6c0f962dd4dd2b3eeb96b18612584/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Scam URL: + description: This feed covers fraudulent electronic shops, investment scams, dating scams. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/scam_url_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/2130adc3c67c43f9a3664b187931375e/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Smishing: + description: The Smishing feed works exactly the same as the SMS Scam feed except that the fraudulent activity utilizes smishing. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/smishing_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/330ad7d0c736476babe5e49077b96c95/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + SMS scam: + description: This feed contains targeted information about the current and prevalent SMS scam domains, URLs, and associated data. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/sms_scam_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/6e20217a2e1246b8ab11be29f759f716/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + URL: + description: URL feed provides information about current and prevalent malicious URLs and associated data. The feed is created from all URL sources every five minutes, deduplication happens every 24 hours. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/url-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/1d3208c143be49da8130f5a66fd3a0fa/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: Shodan: Country Stream: description: Collects the Shodan stream for one or multiple countries from the Shodan API. From 38d323c4aed811e28d2418fd4d2741d4df2dd649 Mon Sep 17 00:00:00 2001 From: Ladislav Baco Date: Thu, 29 May 2025 00:04:37 +0200 Subject: [PATCH 4/7] Improve STIX patterns parsing Use the official STIX2 Pattern Validator to get thecomparison expressions and extracts simple IoCs from them. Support for URLs, Domains, IPv4, IPv6 and also for MD5, SHA-1 and SHA-256 hashes. Small fixes and workarounds implemented to address certain anomalies in STIX data provided by some vendors (e.g. ETI) - SHA1 and SHA256 keywords accepted, invalid objects reported as Domains or URLs are dropped without throwing the exceptions --- intelmq/bots/parsers/stix/REQUIREMENTS.txt | 4 + intelmq/bots/parsers/stix/parser.py | 136 +++++++++++++----- .../bots/parsers/stix/test_parser_bot.py | 61 ++++++-- 3 files changed, 154 insertions(+), 47 deletions(-) create mode 100644 intelmq/bots/parsers/stix/REQUIREMENTS.txt diff --git a/intelmq/bots/parsers/stix/REQUIREMENTS.txt b/intelmq/bots/parsers/stix/REQUIREMENTS.txt new file mode 100644 index 0000000000..49f2b3b8cc --- /dev/null +++ b/intelmq/bots/parsers/stix/REQUIREMENTS.txt @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# SPDX-License-Identifier: AGPL-3.0-or-later + +stix2-patterns>=2.0.0 diff --git a/intelmq/bots/parsers/stix/parser.py b/intelmq/bots/parsers/stix/parser.py index f8a406f8a1..cda8e3563d 100644 --- a/intelmq/bots/parsers/stix/parser.py +++ b/intelmq/bots/parsers/stix/parser.py @@ -7,34 +7,48 @@ import json + from intelmq.lib.bot import ParserBot +try: + import stix2patterns.v21.pattern as stix2_pattern +except ImportError: + stix2_pattern = None + class StixParserBot(ParserBot): """Parse STIX indicators""" parse = ParserBot.parse_json_stream recover_line = ParserBot.recover_line_json_stream + def init(self): + if stix2_pattern is None: + raise MissingDependencyError('stix2-patterns') + def parse_line(self, line, report): """ Parse one STIX object of indicator type """ object_type = line.get('type', '') if object_type == 'indicator': - event = self.new_event(report) - event.add('raw', json.dumps(line)) - event.add('comment', line.get('description', '')) - event.add('extra.labels', line.get('labels', None)) - event.add('time.source', line.get('valid_from', '1970-01-01T00:00:00Z')) - # classification will be determined by expert bot specific for given TAXII collection - event.add('classification.type', 'undetermined') - pattern = line.get('pattern', '') # stix, pcre, sigma, snort, suricata, yara pattern_type = line.get('pattern_type', '') if pattern_type == 'stix': - indicator = self.parse_stix_pattern(pattern) - if indicator: - event.add(indicator[0], indicator[1]) + indicators = StixParserBot.parse_stix_pattern(pattern, self.logger) + for indicator_type, indicator_value in indicators: + event = self.new_event(report) + event.add('raw', json.dumps(line)) + event.add('comment', line.get('description', '')) + event.add('extra.labels', line.get('labels', None)) + event.add('time.source', line.get('valid_from', '1970-01-01T00:00:00Z')) + + # IP address may be passed in Domain feeds or Domain may be passed in URL feeds + # It violates the STIX format, however, in some sources it happens (e.g. in ETI) + # Drop such events without failures and exceptions which slowing down the processing + event.add(indicator_type, indicator_value, raise_failure=False) + + # classification can be overridden by vendor-specific parser below + event.add('classification.type', 'undetermined') self.parse_vendor_specific(event, line, report) yield event else: @@ -51,38 +65,84 @@ def parse_vendor_specific(self, event, line, report): return @staticmethod - def parse_stix_pattern(pattern): + def _get_value_from_comparison_expression(comparison, logger=None): + """ + STIX Comparison Expressions: + https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_boiciucr9smf + + comparison is a tuple obtained from stix2patterns.v21.pattern.Pattern(pattern).inspect().comparisons, + e.g. (['value'], '=', "'http://example.org'"), (['value'], '=', "'127.0.0.1/32'") + """ + if len(comparison) != 3: + if logger: + logger.warning('Unexpected Comparison Expressions. Expression: {}'.format(comparison)) + return + + property_name, operator, value = comparison + supported_property_names = [['value'], + ['hashes', 'MD5'], + ['hashes', 'SHA-1'], + ['hashes', 'SHA-256'], + # Based on 10.7 Hashing Algorithm Vocabulary, these keys are not valid, but they are used in some feeds (e.g. ETI) + # https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_ths0b11wzxv3 + ['hashes', 'SHA1'], + ['hashes', 'SHA256'] + ] + if not (property_name in supported_property_names) or (operator != '=') or not value.startswith("'") or not value.endswith("'"): + if logger: + logger.info('Unsupported Comparison Expression. Only Comparison Expressions with "equal" comparison operator and "value" or "hashes" property are supported. Expression: {}'.format(comparison)) + return + + # remove single quotes from returned value + return value[1:-1] + + @staticmethod + def parse_stix_pattern(pattern, logger=None): """ STIX Patterning: https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_e8slinrhxcc9 """ - if pattern.count('[') != 1: - print('Unsupported Pattern Expression. Only single Observation Expression is supported. Pattern: {}'.format(pattern)) - return - value = pattern.split("'")[1] - if pattern.startswith('[url:value = '): - return ('source.url', value) - if pattern.startswith('[domain-name:value = '): - return ('source.fqdn', value) - if pattern.startswith('[ipv4-addr:value = '): - # remove port, sometimes the port is present in ETI - value = value.split(':')[0] - # strip CIDR if IPv4 network contains single host only - value = value[:-3] if value.endswith('/32') else value - # check if pattern is in CIDR notation - if value.rfind('/') > -1: - return ('source.network', value) - else: - return ('source.ip', value) - if pattern.startswith('[ipv6-addr:value = '): - # strip CIDR if IPv6 network contains single host only - value = value[:-4] if value.endswith('/128') else value - # check if pattern is in CIDR notation - if value.rfind('/') > -1: - return ('source.network', value) - else: - return ('source.ip', value) + indicators = [] + comparisons = stix2_pattern.Pattern(pattern).inspect().comparisons + for key in comparisons.keys(): + comparison_expressions = comparisons.get(key, []) + for comparison in comparison_expressions: + value = StixParserBot._get_value_from_comparison_expression(comparison, logger) + if not value: + pass + if key == 'url': + indicators.append(('source.url', value)) + elif key == 'domain-name': + indicators.append(('source.fqdn', value)) + elif key == 'ipv4-addr': + # remove port, sometimes the port is present in ETI + value = value.split(':')[0] + # strip CIDR if IPv4 network contains single host only + value = value[:-3] if value.endswith('/32') else value + # check if pattern is in CIDR notation + if value.rfind('/') > -1: + indicators.append(('source.network', value)) + else: + indicators.append(('source.ip', value)) + elif key == 'ipv6-addr': + # strip CIDR if IPv6 network contains single host only + value = value[:-4] if value.endswith('/128') else value + # check if pattern is in CIDR notation + if value.rfind('/') > -1: + indicators.append(('source.network', value)) + else: + indicators.append(('source.ip', value)) + elif key == 'file': + if len(comparison) == 3 and len(comparison[0]) == 2 and comparison[0][0] == 'hashes': + # converts MD5, SHA-1, SHA1, SHA-256, SHA256 to md5, sha1, sha256 used in IntelMQ + hash_algo = comparison[0][1].lower().replace('-', '') + indicators.append(('malware.hash.' + hash_algo, value)) + else: + if logger: + logger.warning('Unsupported Object Type "{}" in Pattern Expression. Pattern: {}'.format(key, pattern)) + + return indicators BOT = StixParserBot diff --git a/intelmq/tests/bots/parsers/stix/test_parser_bot.py b/intelmq/tests/bots/parsers/stix/test_parser_bot.py index 4e0695cd14..628052cec7 100644 --- a/intelmq/tests/bots/parsers/stix/test_parser_bot.py +++ b/intelmq/tests/bots/parsers/stix/test_parser_bot.py @@ -40,6 +40,7 @@ } +@test.skip_exotic() class TestStixParserBot(test.BotTestCase, unittest.TestCase): """ A TestCase for a StixParserBot. @@ -58,52 +59,94 @@ def test_event(self): def test_pattern_url(self): """ Test if url pattern is parsed. """ - indicator = self.bot_reference.parse_stix_pattern("[url:value = 'http://example.org']") + indicator = self.bot_reference.parse_stix_pattern("[url:value = 'http://example.org']")[0] self.assertEqual(str(indicator[0]), 'source.url') self.assertEqual(str(indicator[1]), 'http://example.org') - def test_pattern_url(self): + def test_pattern_domain(self): """ Test if domain pattern is parsed. """ - indicator = self.bot_reference.parse_stix_pattern("[domain-name:value = 'example.org']") + indicator = self.bot_reference.parse_stix_pattern("[domain-name:value = 'example.org']")[0] self.assertEqual(str(indicator[0]), 'source.fqdn') self.assertEqual(str(indicator[1]), 'example.org') def test_pattern_ipv4(self): """ Test if ipv4 pattern is parsed. """ - indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.1']") + indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.1']")[0] self.assertEqual(str(indicator[0]), 'source.ip') self.assertEqual(str(indicator[1]), '127.0.0.1') def test_pattern_ipv4_cidr(self): """ Test if ipv4 cidr pattern is parsed. """ - indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.0/8']") + indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.0/8']")[0] self.assertEqual(str(indicator[0]), 'source.network') self.assertEqual(str(indicator[1]), '127.0.0.0/8') def test_pattern_ipv4_cidr_single_host(self): """ Test if ipv4 cidr with single host pattern is parsed. """ - indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.1/32']") + indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.1/32']")[0] self.assertEqual(str(indicator[0]), 'source.ip') self.assertEqual(str(indicator[1]), '127.0.0.1') def test_pattern_ipv6(self): """ Test if ipv6 pattern is parsed. """ - indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = '::1']") + indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = '::1']")[0] self.assertEqual(str(indicator[0]), 'source.ip') self.assertEqual(str(indicator[1]), '::1') def test_pattern_ipv6_cidr(self): """ Test if ipv6 cidr pattern is parsed. """ - indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = 'fe:80::/10']") + indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = 'fe:80::/10']")[0] self.assertEqual(str(indicator[0]), 'source.network') self.assertEqual(str(indicator[1]), 'fe:80::/10') def test_pattern_ipv6_cidr_single_host(self): """ Test if ipv6 cidr with single host pattern is parsed. """ - indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = 'fe:80::1/128']") + indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = 'fe:80::1/128']")[0] self.assertEqual(str(indicator[0]), 'source.ip') self.assertEqual(str(indicator[1]), 'fe:80::1') + def test_pattern_hash_md5(self): + """ Test if domain pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.MD5 = '44d88612fea8a8f36de82e1278abb02f']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.md5') + self.assertEqual(str(indicator[1]), '44d88612fea8a8f36de82e1278abb02f') + + def test_pattern_hash_sha1(self): + """ Test if domain pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.'SHA-1' = '3395856ce81f2b7382dee72602f798b642f14140']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.sha1') + self.assertEqual(str(indicator[1]), '3395856ce81f2b7382dee72602f798b642f14140') + + # Based on 10.7 Hashing Algorithm Vocabulary, keys SHA1 and SHA256 are not valid, but they are used in some feeds (e.g. ETI) + # https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_ths0b11wzxv3 + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.SHA1 = '3395856ce81f2b7382dee72602f798b642f14140']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.sha1') + self.assertEqual(str(indicator[1]), '3395856ce81f2b7382dee72602f798b642f14140') + + def test_pattern_hash_sha256(self): + """ Test if domain pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.'SHA-256' = '275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.sha256') + self.assertEqual(str(indicator[1]), '275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f') + + # Based on 10.7 Hashing Algorithm Vocabulary, keys SHA1 and SHA256 are not valid, but they are used in some feeds (e.g. ETI) + # https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_ths0b11wzxv3 + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.SHA256 = '275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.sha256') + self.assertEqual(str(indicator[1]), '275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f') + + def test_complex_pattern1(self): + """ Test if complex pattern is parsed. """ + indicators = self.bot_reference.parse_stix_pattern("[url:value = 'http://example.org' AND ipv4-addr:value = '127.0.0.1/32']") + self.assertEqual(('source.url', 'http://example.org') in indicators, True) + self.assertEqual(('source.ip', '127.0.0.1') in indicators, True) + + def test_complex_pattern2(self): + """ Test if complex pattern is parsed. """ + indicators = self.bot_reference.parse_stix_pattern("[url:value = 'http://example.org'] AND [ipv4-addr:value = '127.0.0.1/32']") + self.assertEqual(('source.url', 'http://example.org') in indicators, True) + self.assertEqual(('source.ip', '127.0.0.1') in indicators, True) + if __name__ == '__main__': # pragma: no cover unittest.main() From a9b07cc3e48192d25c40862b15b0fdaa32eb0945 Mon Sep 17 00:00:00 2001 From: Ladislav Baco Date: Thu, 29 May 2025 01:51:37 +0200 Subject: [PATCH 5/7] Add TAXII and STIX bots documentation --- docs/user/bots.md | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/docs/user/bots.md b/docs/user/bots.md index 2fcba0ac30..6c6db892be 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -1276,6 +1276,40 @@ Also, you will need to know an appropriate STOMP *destination* (aka (optional, string) Password to use. +--- + +### TAXII
+ +Collects indicator objects from TAXII server. + +**Module:** `intelmq.bots.collectors.taxii.collector` + +**Requirements** + +Install `taxii2-client` module: + +```bash +pip3 install -r intelmq/bots/collectors/taxii/REQUIREMENTS.txt +``` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`username`** + +(required, string) TAXII username. + +**`password`** + +(required, string) TAXII password. + +**`collection`** + +(required, string) The URL of collection to fetch. + +**`time_delta`** + +(optional, integer) The time (in seconds) span to look back. Default to 3600. + ## Parser Bots If not set differently during parsing, all parser bots copy the following fields from the report to an event: @@ -2238,6 +2272,48 @@ No additional parameters. --- +### STIX
+ +Parses indicators objects in STIX format received by TAXII collector. + +**Module:** `intelmq.bots.parsers.stix.parser` + +**Requirements** + +Install `stix2-patterns` module: + +```bash +pip3 install -r intelmq/bots/parsers/stix/REQUIREMENTS.txt +``` + +No additional parameters. + +--- + +### STIX
+ +Parses ESET Threat Intelligence feeds. + +This bot Parses indicators objects in STIX format received by TAXII collector +from ESET Threat Intelligence TAXII server. +Then it analyzes event's comments based on STIX indicator's description +and it adds classification.type and malware family info. +It is recommended to apply TaxonomyExpertBot then to map the taxonomy. + +**Module:** `intelmq.bots.parsers.stix.parser_eset` + +**Requirements** + +Install `stix2-patterns` module: + +```bash +pip3 install -r intelmq/bots/parsers/stix/REQUIREMENTS.txt +``` + +No additional parameters. + +--- + ### Surbl
Parses data from surbl feed. From c43b644ae5b2f1bfdc59bc40b447e74171335185 Mon Sep 17 00:00:00 2001 From: Ladislav Baco Date: Thu, 29 May 2025 09:17:36 +0200 Subject: [PATCH 6/7] Fix missing dependency error --- intelmq/bots/parsers/stix/parser.py | 1 + intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/intelmq/bots/parsers/stix/parser.py b/intelmq/bots/parsers/stix/parser.py index cda8e3563d..1234e02caa 100644 --- a/intelmq/bots/parsers/stix/parser.py +++ b/intelmq/bots/parsers/stix/parser.py @@ -9,6 +9,7 @@ from intelmq.lib.bot import ParserBot +from intelmq.lib.exceptions import MissingDependencyError try: import stix2patterns.v21.pattern as stix2_pattern diff --git a/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py b/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py index d40f07a3f4..a48d6ff851 100644 --- a/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py +++ b/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py @@ -42,7 +42,7 @@ 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoiLCAiZGVzY3JpcHRpb24iOiAiQyZDIGluZGljYXRlcyB0aGF0IGEgYm90bmV0IFdpbjMyL1NweS5MdW1tYVN0ZWFsZXIuQiB0cm9qYW4gaXMgcHJlc2VudC4iLCAibGFiZWxzIjogWyJtYWxpY2lvdXMtYWN0aXZpdHkiXX0=' } - +@test.skip_exotic() class TestESETStixParserBot(test.BotTestCase, unittest.TestCase): """ A TestCase for an ESETStixParserBot. From 23827214de991ab0253d6c457050c33351089da8 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 18 Jul 2025 12:32:43 +0100 Subject: [PATCH 7/7] doc: add changelog entry for PR#2611 --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4f4cada70..12532f80d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,9 +25,12 @@ Please refer to the [NEWS](NEWS.md) for a list of changes which have an affect o ### Bots #### Collectors +- `intelmq.bots.collectors.taxii.collector`: Added new bot to collect data from TAXII servers (PR#2611 by Ladislav Baco). #### Parsers - `intelmq.bots.parsers.cymru.parser_cap_program`: Add mapping for TOR and ipv6-icmp protocol (PR#2621 by Mikk Margus Möll). +- `intelmq.bots.parsers.stix.parser`: Added new bot to parse STIX data (PR#2611 by Ladislav Baco). +- `intelmq.bots.parsers.stix.parser_eset`: Added new bot to parse STIX data from ESET (PR#2611 by Ladislav Baco). #### Experts - `intelmq.bots.experts.asn_lookup.expert`: