diff --git a/CHANGELOG.md b/CHANGELOG.md index f4f4cada70..12532f80d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,9 +25,12 @@ Please refer to the [NEWS](NEWS.md) for a list of changes which have an affect o ### Bots #### Collectors +- `intelmq.bots.collectors.taxii.collector`: Added new bot to collect data from TAXII servers (PR#2611 by Ladislav Baco). #### Parsers - `intelmq.bots.parsers.cymru.parser_cap_program`: Add mapping for TOR and ipv6-icmp protocol (PR#2621 by Mikk Margus Möll). +- `intelmq.bots.parsers.stix.parser`: Added new bot to parse STIX data (PR#2611 by Ladislav Baco). +- `intelmq.bots.parsers.stix.parser_eset`: Added new bot to parse STIX data from ESET (PR#2611 by Ladislav Baco). #### Experts - `intelmq.bots.experts.asn_lookup.expert`: diff --git a/docs/user/bots.md b/docs/user/bots.md index 2fcba0ac30..6c6db892be 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -1276,6 +1276,40 @@ Also, you will need to know an appropriate STOMP *destination* (aka (optional, string) Password to use. +--- + +### TAXII
+ +Collects indicator objects from TAXII server. + +**Module:** `intelmq.bots.collectors.taxii.collector` + +**Requirements** + +Install `taxii2-client` module: + +```bash +pip3 install -r intelmq/bots/collectors/taxii/REQUIREMENTS.txt +``` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`username`** + +(required, string) TAXII username. + +**`password`** + +(required, string) TAXII password. + +**`collection`** + +(required, string) The URL of collection to fetch. + +**`time_delta`** + +(optional, integer) The time (in seconds) span to look back. Default to 3600. + ## Parser Bots If not set differently during parsing, all parser bots copy the following fields from the report to an event: @@ -2238,6 +2272,48 @@ No additional parameters. --- +### STIX
+ +Parses indicators objects in STIX format received by TAXII collector. + +**Module:** `intelmq.bots.parsers.stix.parser` + +**Requirements** + +Install `stix2-patterns` module: + +```bash +pip3 install -r intelmq/bots/parsers/stix/REQUIREMENTS.txt +``` + +No additional parameters. + +--- + +### STIX
+ +Parses ESET Threat Intelligence feeds. + +This bot Parses indicators objects in STIX format received by TAXII collector +from ESET Threat Intelligence TAXII server. +Then it analyzes event's comments based on STIX indicator's description +and it adds classification.type and malware family info. +It is recommended to apply TaxonomyExpertBot then to map the taxonomy. + +**Module:** `intelmq.bots.parsers.stix.parser_eset` + +**Requirements** + +Install `stix2-patterns` module: + +```bash +pip3 install -r intelmq/bots/parsers/stix/REQUIREMENTS.txt +``` + +No additional parameters. + +--- + ### Surbl
Parses data from surbl feed. diff --git a/intelmq/bots/collectors/taxii/REQUIREMENTS.txt b/intelmq/bots/collectors/taxii/REQUIREMENTS.txt new file mode 100644 index 0000000000..06a4889bdd --- /dev/null +++ b/intelmq/bots/collectors/taxii/REQUIREMENTS.txt @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# SPDX-License-Identifier: AGPL-3.0-or-later + +taxii2-client>=2.3.0 diff --git a/intelmq/bots/collectors/taxii/__init__.py b/intelmq/bots/collectors/taxii/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/intelmq/bots/collectors/taxii/collector.py b/intelmq/bots/collectors/taxii/collector.py new file mode 100644 index 0000000000..5d0e808e1d --- /dev/null +++ b/intelmq/bots/collectors/taxii/collector.py @@ -0,0 +1,64 @@ +""" +SPDX-FileCopyrightText: 2025 Ladislav Baco +SPDX-License-Identifier: AGPL-3.0-or-later + +Get indicator objects from TAXII server + +Configuration parameters: taxii collection (feed) url, username and password. +""" + +import datetime +import json +from requests.exceptions import HTTPError + +from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError + +try: + import taxii2client.v21 as taxii2 +except ImportError: + taxii2 = None + + +class TaxiiCollectorBot(CollectorBot): + """Collect data from TAXII Server""" + collection: str = None + username: str = None + password: str = None + rate_limit: int = 3600 + time_delta: int = 3600 + + def init(self): + if taxii2 is None: + raise MissingDependencyError('taxii2-client') + + if self.collection is None: + raise ValueError('No TAXII collection URL provided.') + if self.username is None: + raise ValueError('No TAXII username provided.') + if self.password is None: + raise ValueError('No TAXII password provided.') + + self._date_after = datetime.datetime.now() - datetime.timedelta(seconds=int(self.time_delta)) + + self._taxii_collection = taxii2.Collection(self.collection, user=self.username, password=self.password) + + def process(self): + try: + title = self._taxii_collection.title + self.logger.info('Collection title: %r.', title) + + # get the indicator objects + objects = self._taxii_collection.get_objects(added_after=self._date_after, type='indicator').get('objects', []) + for obj in objects: + report = self.new_report() + report.add('raw', json.dumps(obj)) + report.add('feed.url', self.collection) + report.add('feed.code', title) + self.send_message(report) + + except HTTPError as e: + self.logger.error('Connection error: %r!', e) + + +BOT = TaxiiCollectorBot diff --git a/intelmq/bots/parsers/stix/REQUIREMENTS.txt b/intelmq/bots/parsers/stix/REQUIREMENTS.txt new file mode 100644 index 0000000000..49f2b3b8cc --- /dev/null +++ b/intelmq/bots/parsers/stix/REQUIREMENTS.txt @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# SPDX-License-Identifier: AGPL-3.0-or-later + +stix2-patterns>=2.0.0 diff --git a/intelmq/bots/parsers/stix/__init__.py b/intelmq/bots/parsers/stix/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/intelmq/bots/parsers/stix/parser.py b/intelmq/bots/parsers/stix/parser.py new file mode 100644 index 0000000000..1234e02caa --- /dev/null +++ b/intelmq/bots/parsers/stix/parser.py @@ -0,0 +1,149 @@ +""" +SPDX-FileCopyrightText: 2025 Ladislav Baco +SPDX-License-Identifier: AGPL-3.0-or-later + +Parse indicators objects in STIX format received from TAXII collector +""" + +import json + + +from intelmq.lib.bot import ParserBot +from intelmq.lib.exceptions import MissingDependencyError + +try: + import stix2patterns.v21.pattern as stix2_pattern +except ImportError: + stix2_pattern = None + + +class StixParserBot(ParserBot): + """Parse STIX indicators""" + parse = ParserBot.parse_json_stream + recover_line = ParserBot.recover_line_json_stream + + def init(self): + if stix2_pattern is None: + raise MissingDependencyError('stix2-patterns') + + def parse_line(self, line, report): + """ Parse one STIX object of indicator type """ + object_type = line.get('type', '') + if object_type == 'indicator': + pattern = line.get('pattern', '') + # stix, pcre, sigma, snort, suricata, yara + pattern_type = line.get('pattern_type', '') + + if pattern_type == 'stix': + indicators = StixParserBot.parse_stix_pattern(pattern, self.logger) + for indicator_type, indicator_value in indicators: + event = self.new_event(report) + event.add('raw', json.dumps(line)) + event.add('comment', line.get('description', '')) + event.add('extra.labels', line.get('labels', None)) + event.add('time.source', line.get('valid_from', '1970-01-01T00:00:00Z')) + + # IP address may be passed in Domain feeds or Domain may be passed in URL feeds + # It violates the STIX format, however, in some sources it happens (e.g. in ETI) + # Drop such events without failures and exceptions which slowing down the processing + event.add(indicator_type, indicator_value, raise_failure=False) + + # classification can be overridden by vendor-specific parser below + event.add('classification.type', 'undetermined') + self.parse_vendor_specific(event, line, report) + yield event + else: + self.logger.warning('Unexpected type of pattern expression: %r, pattern: %r', pattern_type, pattern) + else: + self.logger.warning('Unexpected type of STIX object: %r', object_type) + + def parse_vendor_specific(self, event, line, report): + """ + Parse vendor specific details from the STIX 2.1 Indicator object. + This method by default does nothing and it is called just before IntelMQ event is yielded. + If we need vendor-specific STIX parser, we can inherit from this class and override this one method. + """ + return + + @staticmethod + def _get_value_from_comparison_expression(comparison, logger=None): + """ + STIX Comparison Expressions: + https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_boiciucr9smf + + comparison is a tuple obtained from stix2patterns.v21.pattern.Pattern(pattern).inspect().comparisons, + e.g. (['value'], '=', "'http://example.org'"), (['value'], '=', "'127.0.0.1/32'") + """ + if len(comparison) != 3: + if logger: + logger.warning('Unexpected Comparison Expressions. Expression: {}'.format(comparison)) + return + + property_name, operator, value = comparison + supported_property_names = [['value'], + ['hashes', 'MD5'], + ['hashes', 'SHA-1'], + ['hashes', 'SHA-256'], + # Based on 10.7 Hashing Algorithm Vocabulary, these keys are not valid, but they are used in some feeds (e.g. ETI) + # https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_ths0b11wzxv3 + ['hashes', 'SHA1'], + ['hashes', 'SHA256'] + ] + if not (property_name in supported_property_names) or (operator != '=') or not value.startswith("'") or not value.endswith("'"): + if logger: + logger.info('Unsupported Comparison Expression. Only Comparison Expressions with "equal" comparison operator and "value" or "hashes" property are supported. Expression: {}'.format(comparison)) + return + + # remove single quotes from returned value + return value[1:-1] + + @staticmethod + def parse_stix_pattern(pattern, logger=None): + """ + STIX Patterning: + https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_e8slinrhxcc9 + """ + + indicators = [] + comparisons = stix2_pattern.Pattern(pattern).inspect().comparisons + for key in comparisons.keys(): + comparison_expressions = comparisons.get(key, []) + for comparison in comparison_expressions: + value = StixParserBot._get_value_from_comparison_expression(comparison, logger) + if not value: + pass + if key == 'url': + indicators.append(('source.url', value)) + elif key == 'domain-name': + indicators.append(('source.fqdn', value)) + elif key == 'ipv4-addr': + # remove port, sometimes the port is present in ETI + value = value.split(':')[0] + # strip CIDR if IPv4 network contains single host only + value = value[:-3] if value.endswith('/32') else value + # check if pattern is in CIDR notation + if value.rfind('/') > -1: + indicators.append(('source.network', value)) + else: + indicators.append(('source.ip', value)) + elif key == 'ipv6-addr': + # strip CIDR if IPv6 network contains single host only + value = value[:-4] if value.endswith('/128') else value + # check if pattern is in CIDR notation + if value.rfind('/') > -1: + indicators.append(('source.network', value)) + else: + indicators.append(('source.ip', value)) + elif key == 'file': + if len(comparison) == 3 and len(comparison[0]) == 2 and comparison[0][0] == 'hashes': + # converts MD5, SHA-1, SHA1, SHA-256, SHA256 to md5, sha1, sha256 used in IntelMQ + hash_algo = comparison[0][1].lower().replace('-', '') + indicators.append(('malware.hash.' + hash_algo, value)) + else: + if logger: + logger.warning('Unsupported Object Type "{}" in Pattern Expression. Pattern: {}'.format(key, pattern)) + + return indicators + + +BOT = StixParserBot diff --git a/intelmq/bots/parsers/stix/parser_eset.py b/intelmq/bots/parsers/stix/parser_eset.py new file mode 100644 index 0000000000..21d5772b4b --- /dev/null +++ b/intelmq/bots/parsers/stix/parser_eset.py @@ -0,0 +1,111 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Parser bot for ESET Threat Intelligence feeds +This bot parses STIX Indicators objects received from TAXII collector +Then it analyzes event's comments based on STIX indicator's description +and it adds classification.type and malware family info +It is recommended to apply TaxonomyExpertBot then to map the taxonomy +""" + +import re + +from intelmq.bots.parsers.stix.parser import StixParserBot + + +CLASSIFICATION_BY_STRING = { + 'Host actively distributes high-severity malicious content in the form of executable code.': 'malware-distribution', + 'Host actively distributes high-severity threat in the form of executable code.': 'malware-distribution', + 'Host actively distributes high-severity threat in the form of malicious code.': 'malware-distribution', + 'Host actively distributes high-severity threat in the form of script code.': 'malware-distribution', + 'Host is known to be actively distributing adware or other medium-risk software.': 'malware-distribution', + 'Host is known to be actively distributing high-severity mobile threats or low-risk software.': 'other', + 'Host is known to be actively distributing threats or is of uncertain reputation.': 'other', + 'Host is known to be distributing low-risk and potentially unwanted content.': 'other', + 'Host actively distributes potentially unwanted or unsafe threat.': 'other', + 'Host is known source of phishing or other fraudulent content.': 'phishing', + 'Host is known source of active fraudulent content.': 'other', + 'Host is used as command and control server.': 'c2-server', + 'Web services scanning and attacks': 'scanner', + 'RDP bruteforce IP': 'brute-force', + 'SQL bruteforce IP': 'brute-force', + 'SMB bruteforce IP': 'brute-force', + 'MySQL bruteforce IP': 'brute-force', + 'FTP bruteforce IP': 'brute-force' +} + +CLASSIFICATION_REGEX = { + 'C&C indicates that a botnet ([^ ]+) ([^ ]+) is present.': 'c2-server', + 'C&C of ([^ ]+) ([^ ]+)': 'c2-server', + 'Host is used as command and control server of ([^ ]+) ([^ ]+) malware family.': 'c2-server', + 'Each of these file hashes indicates that a variant of ([^ ]+) ([^ ]+) is present.': 'malware', + '^[.* ]?([^ ]+) C&C server.*$': 'c2-server', + '^[.* ]?([^ ]+) backdoor.*$': 'malware', + '^[.* ]?([^ ]+) trojan.*$': 'malware', + '^[.* ]?([^ ]+) implant.*$': 'malware', + 'Loader for ([^ ]+).*$': 'malware' +} + +CLASSIFICATION_BY_REGEX = {} +for (regex, classification_type) in CLASSIFICATION_REGEX.items(): + CLASSIFICATION_BY_REGEX[re.compile(regex)] = classification_type + + +class ESETStixParserBot(StixParserBot): + """Add classification.type and malware family to events""" + + # Platform/Type.Family.Variant!Suffixes + # Type and suffixes are optional + _malware_naming_convention_pattern = re.compile(r'^([^/]*/)?([^\.]*\.)?([^\.]+)(\.[^!]*)(!.*)?$') + + def parse_vendor_specific(self, event, line, report): + classification_type = event.get('classification.type', 'undetermined') + if classification_type == 'undetermined': + comment = event.get('comment', '') + classification_type, malware_name = self.classify(comment) + event.add('classification.type', classification_type, overwrite=True) + if malware_name: + event.add('malware.name', malware_name) + else: + # classification.type already present, do not change it + pass + + @staticmethod + def classify(comment): + """ Classify comment and returns (classification_type, malware_name) """ + classification_type = CLASSIFICATION_BY_STRING.get(comment, None) + if classification_type: + malware_name = None + return (classification_type, malware_name) + + for (pattern, classification_type) in CLASSIFICATION_BY_REGEX.items(): + match = pattern.match(comment) + if match: + malware_name = None + groups = match.groups() + if len(groups) > 0: + malware = groups[0] + malware_name = ESETStixParserBot.extract_malware_family(malware) + return (classification_type, malware_name) + + return ('undetermined', None) + + @staticmethod + def extract_malware_family(malware): + """ Extract malware family from the threat detection string """ + + match = ESETStixParserBot._malware_naming_convention_pattern.match(malware) + if match and len(match.groups()) == 5: + malware_name = match.groups()[2] + else: + # usually just malware family (or unknown naming convention) + malware_name = malware + + # IntelMQ malware.name should be lowercase + return malware_name.lower() + + +BOT = ESETStixParserBot diff --git a/intelmq/etc/feeds.yaml b/intelmq/etc/feeds.yaml index 258e91c1f7..bd647bc772 100644 --- a/intelmq/etc/feeds.yaml +++ b/intelmq/etc/feeds.yaml @@ -1802,6 +1802,234 @@ providers: revision: 2020-06-30 documentation: https://www.eset.com/int/business/services/threat-intelligence/ public: false + APT IoC: + description: Indicators of Compromise associated with APT groups' attacks. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/apt-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/97e3eb74ae5f46dd9e22f677a6938ee7/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Botnet: + description: Data from automated botnet tracking system. Indicators of Compromise include C&Cs (URLs) and MD5, SHA-1, SHA-256 (currently not implemented in TStixParserBot). + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/botnet-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/0abb06690b0b47e49cd7794396b76b20/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Botnet C&C: + description: Subset of a Botnet feed, provides information about URLs of Command and Control (C&C) servers and associated data. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/cc-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/d1923a526e8f400dbb301259240ee3d5/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Botnet Target: + description: Subset of a Botnet feed, provides information about the targets. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/target-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/61b6e4f9153e411ca7a9982a2c6ae788/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Cryptoscam: + description: Subset of scam domains and URLs that contain targeted information about the current and prevalent crypto scam domains, URLs, and associated data. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/cryptoscam_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/2c183ce9551a43338c6cc2ed7c2a704d/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Domain: + description: The feed covers the domain name, the data associated with it, and respective malicious activity. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/domain-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/a34aa0a4f9de419582a883863503f9c4/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + IP: + description: Current and prevalent malicious and abusive IPs and some data associated with them. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/ip-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/baaed2a92335418aa753fe944e13c23a/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Phishing URL: + description: Phishing URLs direct recipients to fake websites and attempt to entice them into divulging sensitive data such as login credentials or financial information. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/phishing_url_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/d0a6c0f962dd4dd2b3eeb96b18612584/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Scam URL: + description: This feed covers fraudulent electronic shops, investment scams, dating scams. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/scam_url_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/2130adc3c67c43f9a3664b187931375e/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + Smishing: + description: The Smishing feed works exactly the same as the SMS Scam feed except that the fraudulent activity utilizes smishing. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/smishing_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/330ad7d0c736476babe5e49077b96c95/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + SMS scam: + description: This feed contains targeted information about the current and prevalent SMS scam domains, URLs, and associated data. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/sms_scam_feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/6e20217a2e1246b8ab11be29f759f716/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: + URL: + description: URL feed provides information about current and prevalent malicious URLs and associated data. The feed is created from all URL sources every five minutes, deduplication happens every 24 hours. + additional_information: + documentation: https://help.eset.com/eti_portal/en-US/url-feed.html + revision: 2025-05-01 + public: false + bots: + collector: + module: intelmq.bots.collectors.taxii.collector + parameters: + name: __FEED__ + provider: __PROVIDER__ + username: + password: + collection: https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/1d3208c143be49da8130f5a66fd3a0fa/ + time_delta: 3600 + parser: + module: intelmq.bots.parsers.stix.parser_eset + parameters: Shodan: Country Stream: description: Collects the Shodan stream for one or multiple countries from the Shodan API. diff --git a/intelmq/tests/bots/collectors/taxii/test_collector_bot.py b/intelmq/tests/bots/collectors/taxii/test_collector_bot.py new file mode 100644 index 0000000000..52b4df0503 --- /dev/null +++ b/intelmq/tests/bots/collectors/taxii/test_collector_bot.py @@ -0,0 +1,108 @@ +# SPDX-FileCopyrightText: 2016 Sebastian Wagner, 2025 Ladislav Baco +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Test with reports, based on intelmq/tests/lib/test_collector_bot.py +""" +import unittest + +import re +import requests_mock + +import intelmq.lib.bot as bot +import intelmq.lib.test as test +from intelmq.bots.collectors.taxii.collector import TaxiiCollectorBot + + +EXAMPLE_REPORT = {'__type': 'Report', + 'feed.name': 'Taxii Feed', + 'feed.code': 'feed stix2.1', + 'feed.provider': 'Taxii Provider', + 'feed.documentation': 'Taxii Documentation', + 'feed.accuracy': 100.0, + 'feed.url': 'http://localhost/feed', + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoifQ==' + } + +def prepare_mocker(mocker): + mocker.get( + 'http://localhost/feed/', + json={ + 'id': 'feed', + 'title': 'feed stix2.1', + 'can_read': True, + 'can_write': False + }, + headers={'Content-Type': 'application/taxii+json;version=2.1'} + ) + mocker.get( + re.compile('http://localhost/feed/objects/.*'), + json={ + 'id': 'feed', + 'title': 'feed stix2.1', + 'can_read': True, + 'can_write': False, + 'more': False, + 'objects': [{ + 'id': 'indicator--0', + 'type': 'indicator', + 'spec_version': '2.1', + 'created': '1970-01-01T00:00:00.000Z', + 'modified': '1970-01-01T00:00:00.000Z', + 'pattern': "[url:value = 'http://example.org']", + 'pattern_type': 'stix', + 'valid_from': '1970-01-01T00:00:00Z' + }]}, + headers={'Content-Type': 'application/taxii+json;version=2.1'} + ) + +@test.skip_exotic() +@requests_mock.Mocker() +class TestTaxiiCollectorBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for a TaxiiCollectorBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = TaxiiCollectorBot + cls.sysconfig = {'name': 'Taxii Feed', + 'provider': 'Taxii Provider', + 'documentation': 'Taxii Documentation', + 'collection': 'http://localhost/feed', + 'username': 'user', + 'password': 'pass' + } + + def test_event(self, mocker): + """ Test if correct Event has been produced. """ + prepare_mocker(mocker) + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_REPORT) + + def test_missing_collection(self, mocker): + """ Test if missing collection is detected. """ + with self.assertRaises(ValueError) as context: + self.run_bot(parameters={'collection': None}) + exception = context.exception + self.assertEqual(str(exception), 'No TAXII collection URL provided.') + + def test_missing_username(self, mocker): + """ Test if missing username is detected. """ + with self.assertRaises(ValueError) as context: + self.run_bot(parameters={'username': None}) + exception = context.exception + self.assertEqual(str(exception), 'No TAXII username provided.') + + def test_missing_password(self, mocker): + """ Test if missing password is detected. """ + with self.assertRaises(ValueError) as context: + self.run_bot(parameters={'password': None}) + exception = context.exception + self.assertEqual(str(exception), 'No TAXII password provided.') + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/stix/test_parser_bot.py b/intelmq/tests/bots/parsers/stix/test_parser_bot.py new file mode 100644 index 0000000000..628052cec7 --- /dev/null +++ b/intelmq/tests/bots/parsers/stix/test_parser_bot.py @@ -0,0 +1,152 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Test with example reports (STIX objects usually collected from TAXII server) +""" +import unittest + +import re +import requests_mock + +import intelmq.lib.bot as bot +import intelmq.lib.test as test +from intelmq.bots.parsers.stix.parser import StixParserBot + + +EXAMPLE_REPORT = {'__type': 'Report', + 'feed.name': 'Taxii Feed', + 'feed.code': 'feed stix2.1', + 'feed.provider': 'Taxii Provider', + 'feed.documentation': 'Taxii Documentation', + 'feed.accuracy': 100.0, + 'feed.url': 'http://localhost/feed', + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoifQ==' + } + +EXAMPLE_EVENT = {'__type': 'Event', + 'feed.name': 'Taxii Feed', + 'feed.code': 'feed stix2.1', + 'feed.provider': 'Taxii Provider', + 'feed.documentation': 'Taxii Documentation', + 'feed.accuracy': 100.0, + 'feed.url': 'http://localhost/feed', + 'source.url': 'http://example.org', + 'time.source': '1970-01-01T00:00:00+00:00', + 'classification.type': 'undetermined', + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoifQ==' + } + + +@test.skip_exotic() +class TestStixParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for a StixParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = StixParserBot + cls.sysconfig = {} + + def test_event(self): + """ Test if correct Event has been produced. """ + self.input_message = EXAMPLE_REPORT + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_EVENT) + + def test_pattern_url(self): + """ Test if url pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[url:value = 'http://example.org']")[0] + self.assertEqual(str(indicator[0]), 'source.url') + self.assertEqual(str(indicator[1]), 'http://example.org') + + def test_pattern_domain(self): + """ Test if domain pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[domain-name:value = 'example.org']")[0] + self.assertEqual(str(indicator[0]), 'source.fqdn') + self.assertEqual(str(indicator[1]), 'example.org') + + def test_pattern_ipv4(self): + """ Test if ipv4 pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.1']")[0] + self.assertEqual(str(indicator[0]), 'source.ip') + self.assertEqual(str(indicator[1]), '127.0.0.1') + + def test_pattern_ipv4_cidr(self): + """ Test if ipv4 cidr pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.0/8']")[0] + self.assertEqual(str(indicator[0]), 'source.network') + self.assertEqual(str(indicator[1]), '127.0.0.0/8') + + def test_pattern_ipv4_cidr_single_host(self): + """ Test if ipv4 cidr with single host pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv4-addr:value = '127.0.0.1/32']")[0] + self.assertEqual(str(indicator[0]), 'source.ip') + self.assertEqual(str(indicator[1]), '127.0.0.1') + + def test_pattern_ipv6(self): + """ Test if ipv6 pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = '::1']")[0] + self.assertEqual(str(indicator[0]), 'source.ip') + self.assertEqual(str(indicator[1]), '::1') + + def test_pattern_ipv6_cidr(self): + """ Test if ipv6 cidr pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = 'fe:80::/10']")[0] + self.assertEqual(str(indicator[0]), 'source.network') + self.assertEqual(str(indicator[1]), 'fe:80::/10') + + def test_pattern_ipv6_cidr_single_host(self): + """ Test if ipv6 cidr with single host pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[ipv6-addr:value = 'fe:80::1/128']")[0] + self.assertEqual(str(indicator[0]), 'source.ip') + self.assertEqual(str(indicator[1]), 'fe:80::1') + + def test_pattern_hash_md5(self): + """ Test if domain pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.MD5 = '44d88612fea8a8f36de82e1278abb02f']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.md5') + self.assertEqual(str(indicator[1]), '44d88612fea8a8f36de82e1278abb02f') + + def test_pattern_hash_sha1(self): + """ Test if domain pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.'SHA-1' = '3395856ce81f2b7382dee72602f798b642f14140']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.sha1') + self.assertEqual(str(indicator[1]), '3395856ce81f2b7382dee72602f798b642f14140') + + # Based on 10.7 Hashing Algorithm Vocabulary, keys SHA1 and SHA256 are not valid, but they are used in some feeds (e.g. ETI) + # https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_ths0b11wzxv3 + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.SHA1 = '3395856ce81f2b7382dee72602f798b642f14140']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.sha1') + self.assertEqual(str(indicator[1]), '3395856ce81f2b7382dee72602f798b642f14140') + + def test_pattern_hash_sha256(self): + """ Test if domain pattern is parsed. """ + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.'SHA-256' = '275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.sha256') + self.assertEqual(str(indicator[1]), '275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f') + + # Based on 10.7 Hashing Algorithm Vocabulary, keys SHA1 and SHA256 are not valid, but they are used in some feeds (e.g. ETI) + # https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_ths0b11wzxv3 + indicator = self.bot_reference.parse_stix_pattern("[file:hashes.SHA256 = '275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f']")[0] + self.assertEqual(str(indicator[0]), 'malware.hash.sha256') + self.assertEqual(str(indicator[1]), '275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f') + + def test_complex_pattern1(self): + """ Test if complex pattern is parsed. """ + indicators = self.bot_reference.parse_stix_pattern("[url:value = 'http://example.org' AND ipv4-addr:value = '127.0.0.1/32']") + self.assertEqual(('source.url', 'http://example.org') in indicators, True) + self.assertEqual(('source.ip', '127.0.0.1') in indicators, True) + + def test_complex_pattern2(self): + """ Test if complex pattern is parsed. """ + indicators = self.bot_reference.parse_stix_pattern("[url:value = 'http://example.org'] AND [ipv4-addr:value = '127.0.0.1/32']") + self.assertEqual(('source.url', 'http://example.org') in indicators, True) + self.assertEqual(('source.ip', '127.0.0.1') in indicators, True) + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py b/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py new file mode 100644 index 0000000000..a48d6ff851 --- /dev/null +++ b/intelmq/tests/bots/parsers/stix/test_parser_eset_bot.py @@ -0,0 +1,125 @@ +# SPDX-FileCopyrightText: 2025 Ladislav Baco +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Test with example reports (STIX objects usually collected from TAXII server) +""" +import unittest + +import re +import requests_mock + +import intelmq.lib.bot as bot +import intelmq.lib.test as test +from intelmq.bots.parsers.stix.parser_eset import ESETStixParserBot + + +EXAMPLE_REPORT = {'__type': 'Report', + 'feed.name': 'Botnet feed', + 'feed.code': 'botnet stix 2.1', + 'feed.provider': 'ESET', + 'feed.documentation': 'https://help.eset.com/eti_portal/en-US/botnet-feed.', + 'feed.accuracy': 100.0, + 'feed.url': 'https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/0abb06690b0b47e49cd7794396b76b20/', + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoiLCAiZGVzY3JpcHRpb24iOiAiQyZDIGluZGljYXRlcyB0aGF0IGEgYm90bmV0IFdpbjMyL1NweS5MdW1tYVN0ZWFsZXIuQiB0cm9qYW4gaXMgcHJlc2VudC4iLCAibGFiZWxzIjogWyJtYWxpY2lvdXMtYWN0aXZpdHkiXX0=' + } + +EXAMPLE_EVENT = {'__type': 'Event', + 'feed.name': 'Botnet feed', + 'feed.code': 'botnet stix 2.1', + 'feed.provider': 'ESET', + 'feed.documentation': 'https://help.eset.com/eti_portal/en-US/botnet-feed.', + 'feed.accuracy': 100.0, + 'feed.url': 'https://taxii.eset.com/taxii2/643f4eb5-f8b7-46a3-a606-6d61d5ce223a/collections/0abb06690b0b47e49cd7794396b76b20/', + 'source.url': 'http://example.org', + 'time.source': '1970-01-01T00:00:00+00:00', + 'classification.type': 'c2-server', + 'malware.name': 'lummastealer', + 'comment': 'C&C indicates that a botnet Win32/Spy.LummaStealer.B trojan is present.', + 'extra.labels': ['malicious-activity'], + 'raw': 'eyJpZCI6ICJpbmRpY2F0b3ItLTAiLCAidHlwZSI6ICJpbmRpY2F0b3IiLCAic3BlY192ZXJzaW9uIjogIjIuMSIsICJjcmVhdGVkIjogIjE5NzAtMDEtMDFUMDA6MDA6MDAuMDAwWiIsICJtb2RpZmllZCI6ICIxOTcwLTAxLTAxVDAwOjAwOjAwLjAwMFoiLCAicGF0dGVybiI6ICJbdXJsOnZhbHVlID0gJ2h0dHA6Ly9leGFtcGxlLm9yZyddIiwgInBhdHRlcm5fdHlwZSI6ICJzdGl4IiwgInZhbGlkX2Zyb20iOiAiMTk3MC0wMS0wMVQwMDowMDowMFoiLCAiZGVzY3JpcHRpb24iOiAiQyZDIGluZGljYXRlcyB0aGF0IGEgYm90bmV0IFdpbjMyL1NweS5MdW1tYVN0ZWFsZXIuQiB0cm9qYW4gaXMgcHJlc2VudC4iLCAibGFiZWxzIjogWyJtYWxpY2lvdXMtYWN0aXZpdHkiXX0=' + } + +@test.skip_exotic() +class TestESETStixParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for an ESETStixParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = ESETStixParserBot + cls.sysconfig = {} + + def test_event(self): + """ Test if correct Event has been produced. """ + self.input_message = EXAMPLE_REPORT + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_EVENT) + + def test_classification_by_string(self): + """ Test if correct classification based on string is returned. """ + classification_type, malware_name = self.bot_reference.classify('Host actively distributes high-severity malicious content in the form of executable code.') + self.assertEqual(str(classification_type), 'malware-distribution') + self.assertEqual(malware_name, None) + + classification_type, malware_name = self.bot_reference.classify('Host is known source of phishing or other fraudulent content.') + self.assertEqual(str(classification_type), 'phishing') + self.assertEqual(malware_name, None) + + classification_type, malware_name = self.bot_reference.classify('Host is used as command and control server.') + self.assertEqual(str(classification_type), 'c2-server') + self.assertEqual(malware_name, None) + + classification_type, malware_name = self.bot_reference.classify('Web services scanning and attacks') + self.assertEqual(str(classification_type), 'scanner') + self.assertEqual(malware_name, None) + + classification_type, malware_name = self.bot_reference.classify('RDP bruteforce IP') + self.assertEqual(str(classification_type), 'brute-force') + self.assertEqual(malware_name, None) + + def test_classification_by_regex(self): + """ Test if correct classification based on regex is returned. """ + classification_type, malware_name = self.bot_reference.classify('C&C indicates that a botnet Win32/Spy.LummaStealer.B trojan is present.') + self.assertEqual(str(classification_type), 'c2-server') + self.assertEqual(str(malware_name), 'lummastealer') + + classification_type, malware_name = self.bot_reference.classify('C&C of Win32/Spy.LummaStealer.B trojan') + self.assertEqual(str(classification_type), 'c2-server') + self.assertEqual(str(malware_name), 'lummastealer') + + classification_type, malware_name = self.bot_reference.classify('Host is used as command and control server of Win32/Emotet.BN trojan malware family.') + self.assertEqual(str(classification_type), 'c2-server') + self.assertEqual(str(malware_name), 'emotet') + + classification_type, malware_name = self.bot_reference.classify('WizardNet backdoor.') + self.assertEqual(str(classification_type), 'malware') + self.assertEqual(str(malware_name), 'wizardnet') + + classification_type, malware_name = self.bot_reference.classify('Loader for Emotet') + self.assertEqual(str(classification_type), 'malware') + self.assertEqual(str(malware_name), 'emotet') + + def test_unknown_classification(self): + """ Test if undetermined classification is returned when comment contains something unexpected. """ + classification_type, malware_name = self.bot_reference.classify('Example of unexpected comment.') + self.assertEqual(str(classification_type), 'undetermined') + self.assertEqual(malware_name, None) + + def test_malware_family_name_extraction(self): + """ Test if correct malwae family name is extracted from the given malware string. """ + malware_name = self.bot_reference.extract_malware_family('Win32/Spy.LummaStealer.B') + self.assertEqual(str(malware_name), 'lummastealer') + + malware_name = self.bot_reference.extract_malware_family('Win32/Rescoms.B') + self.assertEqual(str(malware_name), 'rescoms') + + malware_name = self.bot_reference.extract_malware_family('Emotet') + self.assertEqual(str(malware_name), 'emotet') + + +if __name__ == '__main__': # pragma: no cover + unittest.main()