77
88import json
99
10+
1011from intelmq .lib .bot import ParserBot
1112
13+ try :
14+ import stix2patterns .v21 .pattern as stix2_pattern
15+ except ImportError :
16+ stix2_pattern = None
17+
1218
1319class StixParserBot (ParserBot ):
1420 """Parse STIX indicators"""
1521 parse = ParserBot .parse_json_stream
1622 recover_line = ParserBot .recover_line_json_stream
1723
24+ def init (self ):
25+ if stix2_pattern is None :
26+ raise MissingDependencyError ('stix2-patterns' )
27+
1828 def parse_line (self , line , report ):
1929 """ Parse one STIX object of indicator type """
2030 object_type = line .get ('type' , '' )
2131 if object_type == 'indicator' :
22- event = self .new_event (report )
23- event .add ('raw' , json .dumps (line ))
24- event .add ('comment' , line .get ('description' , '' ))
25- event .add ('extra.labels' , line .get ('labels' , None ))
26- event .add ('time.source' , line .get ('valid_from' , '1970-01-01T00:00:00Z' ))
27- # classification will be determined by expert bot specific for given TAXII collection
28- event .add ('classification.type' , 'undetermined' )
29-
3032 pattern = line .get ('pattern' , '' )
3133 # stix, pcre, sigma, snort, suricata, yara
3234 pattern_type = line .get ('pattern_type' , '' )
3335
3436 if pattern_type == 'stix' :
35- indicator = self .parse_stix_pattern (pattern )
36- if indicator :
37- event .add (indicator [0 ], indicator [1 ])
37+ indicators = StixParserBot .parse_stix_pattern (pattern , self .logger )
38+ for indicator_type , indicator_value in indicators :
39+ event = self .new_event (report )
40+ event .add ('raw' , json .dumps (line ))
41+ event .add ('comment' , line .get ('description' , '' ))
42+ event .add ('extra.labels' , line .get ('labels' , None ))
43+ event .add ('time.source' , line .get ('valid_from' , '1970-01-01T00:00:00Z' ))
44+
45+ # IP address may be passed in Domain feeds or Domain may be passed in URL feeds
46+ # It violates the STIX format, however, in some sources it happens (e.g. in ETI)
47+ # Drop such events without failures and exceptions which slowing down the processing
48+ event .add (indicator_type , indicator_value , raise_failure = False )
49+
50+ # classification can be overridden by vendor-specific parser below
51+ event .add ('classification.type' , 'undetermined' )
3852 self .parse_vendor_specific (event , line , report )
3953 yield event
4054 else :
@@ -51,38 +65,84 @@ def parse_vendor_specific(self, event, line, report):
5165 return
5266
5367 @staticmethod
54- def parse_stix_pattern (pattern ):
68+ def _get_value_from_comparison_expression (comparison , logger = None ):
69+ """
70+ STIX Comparison Expressions:
71+ https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_boiciucr9smf
72+
73+ comparison is a tuple obtained from stix2patterns.v21.pattern.Pattern(pattern).inspect().comparisons,
74+ e.g. (['value'], '=', "'http://example.org'"), (['value'], '=', "'127.0.0.1/32'")
75+ """
76+ if len (comparison ) != 3 :
77+ if logger :
78+ logger .warning ('Unexpected Comparison Expressions. Expression: {}' .format (comparison ))
79+ return
80+
81+ property_name , operator , value = comparison
82+ supported_property_names = [['value' ],
83+ ['hashes' , 'MD5' ],
84+ ['hashes' , 'SHA-1' ],
85+ ['hashes' , 'SHA-256' ],
86+ # Based on 10.7 Hashing Algorithm Vocabulary, these keys are not valid, but they are used in some feeds (e.g. ETI)
87+ # https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_ths0b11wzxv3
88+ ['hashes' , 'SHA1' ],
89+ ['hashes' , 'SHA256' ]
90+ ]
91+ if not (property_name in supported_property_names ) or (operator != '=' ) or not value .startswith ("'" ) or not value .endswith ("'" ):
92+ if logger :
93+ logger .info ('Unsupported Comparison Expression. Only Comparison Expressions with "equal" comparison operator and "value" or "hashes" property are supported. Expression: {}' .format (comparison ))
94+ return
95+
96+ # remove single quotes from returned value
97+ return value [1 :- 1 ]
98+
99+ @staticmethod
100+ def parse_stix_pattern (pattern , logger = None ):
55101 """
56102 STIX Patterning:
57103 https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_e8slinrhxcc9
58104 """
59- if pattern .count ('[' ) != 1 :
60- print ('Unsupported Pattern Expression. Only single Observation Expression is supported. Pattern: {}' .format (pattern ))
61- return
62105
63- value = pattern .split ("'" )[1 ]
64- if pattern .startswith ('[url:value = ' ):
65- return ('source.url' , value )
66- if pattern .startswith ('[domain-name:value = ' ):
67- return ('source.fqdn' , value )
68- if pattern .startswith ('[ipv4-addr:value = ' ):
69- # remove port, sometimes the port is present in ETI
70- value = value .split (':' )[0 ]
71- # strip CIDR if IPv4 network contains single host only
72- value = value [:- 3 ] if value .endswith ('/32' ) else value
73- # check if pattern is in CIDR notation
74- if value .rfind ('/' ) > - 1 :
75- return ('source.network' , value )
76- else :
77- return ('source.ip' , value )
78- if pattern .startswith ('[ipv6-addr:value = ' ):
79- # strip CIDR if IPv6 network contains single host only
80- value = value [:- 4 ] if value .endswith ('/128' ) else value
81- # check if pattern is in CIDR notation
82- if value .rfind ('/' ) > - 1 :
83- return ('source.network' , value )
84- else :
85- return ('source.ip' , value )
106+ indicators = []
107+ comparisons = stix2_pattern .Pattern (pattern ).inspect ().comparisons
108+ for key in comparisons .keys ():
109+ comparison_expressions = comparisons .get (key , [])
110+ for comparison in comparison_expressions :
111+ value = StixParserBot ._get_value_from_comparison_expression (comparison , logger )
112+ if not value :
113+ pass
114+ if key == 'url' :
115+ indicators .append (('source.url' , value ))
116+ elif key == 'domain-name' :
117+ indicators .append (('source.fqdn' , value ))
118+ elif key == 'ipv4-addr' :
119+ # remove port, sometimes the port is present in ETI
120+ value = value .split (':' )[0 ]
121+ # strip CIDR if IPv4 network contains single host only
122+ value = value [:- 3 ] if value .endswith ('/32' ) else value
123+ # check if pattern is in CIDR notation
124+ if value .rfind ('/' ) > - 1 :
125+ indicators .append (('source.network' , value ))
126+ else :
127+ indicators .append (('source.ip' , value ))
128+ elif key == 'ipv6-addr' :
129+ # strip CIDR if IPv6 network contains single host only
130+ value = value [:- 4 ] if value .endswith ('/128' ) else value
131+ # check if pattern is in CIDR notation
132+ if value .rfind ('/' ) > - 1 :
133+ indicators .append (('source.network' , value ))
134+ else :
135+ indicators .append (('source.ip' , value ))
136+ elif key == 'file' :
137+ if len (comparison ) == 3 and len (comparison [0 ]) == 2 and comparison [0 ][0 ] == 'hashes' :
138+ # converts MD5, SHA-1, SHA1, SHA-256, SHA256 to md5, sha1, sha256 used in IntelMQ
139+ hash_algo = comparison [0 ][1 ].lower ().replace ('-' , '' )
140+ indicators .append (('malware.hash.' + hash_algo , value ))
141+ else :
142+ if logger :
143+ logger .warning ('Unsupported Object Type "{}" in Pattern Expression. Pattern: {}' .format (key , pattern ))
144+
145+ return indicators
86146
87147
88148BOT = StixParserBot
0 commit comments