1111import codecs
1212import logging
1313import collections
14- from typing import TYPE_CHECKING , Set , Dict , List , Union
14+ from typing import TYPE_CHECKING , Set , Dict , List , Union , Optional , Sequence
1515
1616if TYPE_CHECKING :
1717 # circular import, otherwise
2020import capa .perf
2121import capa .features
2222import capa .features .extractors .elf
23+ from capa .features .address import Address
2324
2425logger = logging .getLogger (__name__ )
2526MAX_BYTES_FEATURE_SIZE = 0x100
@@ -70,20 +71,13 @@ def __init__(
7071 success : bool ,
7172 statement : Union ["capa.engine.Statement" , "Feature" ],
7273 children : List ["Result" ],
73- locations = None ,
74+ locations : Optional [ Set [ Address ]] = None ,
7475 ):
75- """
76- args:
77- success (bool)
78- statement (capa.engine.Statement or capa.features.Feature)
79- children (list[Result])
80- locations (iterable[VA])
81- """
8276 super (Result , self ).__init__ ()
8377 self .success = success
8478 self .statement = statement
8579 self .children = children
86- self .locations = locations if locations is not None else ()
80+ self .locations = locations if locations is not None else set ()
8781
8882 def __eq__ (self , other ):
8983 if isinstance (other , bool ):
@@ -98,7 +92,7 @@ def __nonzero__(self):
9892
9993
10094class Feature (abc .ABC ):
101- def __init__ (self , value : Union [str , int , bytes ], description = None ):
95+ def __init__ (self , value : Union [str , int , float , bytes ], description = None ):
10296 """
10397 Args:
10498 value (any): the value of the feature, such as the number or string.
@@ -116,6 +110,15 @@ def __hash__(self):
116110 def __eq__ (self , other ):
117111 return self .name == other .name and self .value == other .value
118112
113+ def __lt__ (self , other ):
114+ # TODO: this is a huge hack!
115+ import capa .features .freeze .features
116+
117+ return (
118+ capa .features .freeze .features .feature_from_capa (self ).json ()
119+ < capa .features .freeze .features .feature_from_capa (other ).json ()
120+ )
121+
119122 def get_value_str (self ) -> str :
120123 """
121124 render the value of this feature, for use by `__str__` and friends.
@@ -137,27 +140,10 @@ def __str__(self):
137140 def __repr__ (self ):
138141 return str (self )
139142
140- def evaluate (self , ctx : Dict ["Feature" , Set [int ]], ** kwargs ) -> Result :
143+ def evaluate (self , ctx : Dict ["Feature" , Set [Address ]], ** kwargs ) -> Result :
141144 capa .perf .counters ["evaluate.feature" ] += 1
142145 capa .perf .counters ["evaluate.feature." + self .name ] += 1
143- return Result (self in ctx , self , [], locations = ctx .get (self , []))
144-
145- def freeze_serialize (self ):
146- return (self .__class__ .__name__ , [self .value ])
147-
148- @classmethod
149- def freeze_deserialize (cls , args ):
150- # as you can see below in code,
151- # if the last argument is a dictionary,
152- # consider it to be kwargs passed to the feature constructor.
153- if len (args ) == 1 :
154- return cls (* args )
155- elif isinstance (args [- 1 ], dict ):
156- kwargs = args [- 1 ]
157- args = args [:- 1 ]
158- return cls (* args , ** kwargs )
159- else :
160- return cls (* args )
146+ return Result (self in ctx , self , [], locations = ctx .get (self , set ()))
161147
162148
163149class MatchedRule (Feature ):
@@ -230,7 +216,7 @@ def evaluate(self, ctx, short_circuit=True):
230216 # instead, return a new instance that has a reference to both the substring and the matched values.
231217 return Result (True , _MatchedSubstring (self , matches ), [], locations = locations )
232218 else :
233- return Result (False , _MatchedSubstring (self , None ), [])
219+ return Result (False , _MatchedSubstring (self , {} ), [])
234220
235221 def __str__ (self ):
236222 return "substring(%s)" % self .value
@@ -244,11 +230,11 @@ class _MatchedSubstring(Substring):
244230 note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
245231 """
246232
247- def __init__ (self , substring : Substring , matches ):
233+ def __init__ (self , substring : Substring , matches : Dict [ str , Set [ Address ]] ):
248234 """
249235 args:
250- substring (Substring) : the substring feature that matches.
251- match (Dict[string, List[int]]|None) : mapping from matching string to its locations.
236+ substring: the substring feature that matches.
237+ match: mapping from matching string to its locations.
252238 """
253239 super (_MatchedSubstring , self ).__init__ (str (substring .value ), description = substring .description )
254240 # we want this to collide with the name of `Substring` above,
@@ -327,7 +313,7 @@ def evaluate(self, ctx, short_circuit=True):
327313 # see #262.
328314 return Result (True , _MatchedRegex (self , matches ), [], locations = locations )
329315 else :
330- return Result (False , _MatchedRegex (self , None ), [])
316+ return Result (False , _MatchedRegex (self , {} ), [])
331317
332318 def __str__ (self ):
333319 return "regex(string =~ %s)" % self .value
@@ -341,11 +327,11 @@ class _MatchedRegex(Regex):
341327 note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
342328 """
343329
344- def __init__ (self , regex : Regex , matches ):
330+ def __init__ (self , regex : Regex , matches : Dict [ str , Set [ Address ]] ):
345331 """
346332 args:
347- regex (Regex) : the regex feature that matches.
348- match (Dict[string, List[int]]|None) : mapping from matching string to its locations.
333+ regex: the regex feature that matches.
334+ matches : mapping from matching string to its locations.
349335 """
350336 super (_MatchedRegex , self ).__init__ (str (regex .value ), description = regex .description )
351337 # we want this to collide with the name of `Regex` above,
@@ -389,13 +375,6 @@ def evaluate(self, ctx, **kwargs):
389375 def get_value_str (self ):
390376 return hex_string (bytes_to_str (self .value ))
391377
392- def freeze_serialize (self ):
393- return (self .__class__ .__name__ , [bytes_to_str (self .value ).upper ()])
394-
395- @classmethod
396- def freeze_deserialize (cls , args ):
397- return cls (* [codecs .decode (x , "hex" ) for x in args ])
398-
399378
400379# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
401380ARCH_I386 = "i386"
0 commit comments