Skip to content

Commit a1db2b9

Browse files
committed
Add test to validate match-2-yar feature extraction
1 parent a49c562 commit a1db2b9

File tree

2 files changed

+127
-9
lines changed

2 files changed

+127
-9
lines changed

scripts/match-2-yar.py

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,14 @@
3030
"""
3131
import os
3232
import sys
33+
import json
3334
import logging
3435
import argparse
36+
import binascii
3537
import collections
3638
import multiprocessing
3739
import multiprocessing.pool
38-
from typing import Set, Dict, List
40+
from typing import Set, Dict, List, Union
3941
from pathlib import Path
4042
from datetime import date
4143

@@ -323,7 +325,7 @@ def get_sig_and_mask_for_dotnet_func(dnpe, body):
323325

324326
comment = ""
325327
sig = ""
326-
func_bytes = ""
328+
func_bytes = b""
327329
for insn in body.instructions:
328330
comment += (
329331
"{:04X}".format(insn.offset)
@@ -335,11 +337,11 @@ def get_sig_and_mask_for_dotnet_func(dnpe, body):
335337
)
336338

337339
sig += insn.get_opcode_bytes().hex()
338-
func_bytes += insn.get_opcode_bytes().hex()
340+
func_bytes += insn.get_opcode_bytes()
339341

340342
if insn.operand:
341343
sig += "??" * len(insn.get_operand_bytes())
342-
func_bytes += insn.get_operand_bytes().hex()
344+
func_bytes += insn.get_operand_bytes()
343345

344346
# Format the sig to be in the same style as the vivi portion (bytes seperated by spaces)
345347
formatted_sig = ""
@@ -357,11 +359,25 @@ def get_sig_and_mask_for_dotnet_func(dnpe, body):
357359
class CodeFeature:
358360
"""Basic object that that will be used to create yara rules"""
359361

360-
def __init__(self, sig: str, comment: str, bytez: bytes, filemd5: str):
361-
self.sig = sig
362+
def __init__(
363+
self, sig: str, comment: str, bytez: bytes, filemd5: str, addr: Union[int, tuple[int, int], None], scope: str
364+
):
365+
self.sig = sig.strip().upper()
362366
self.comment = comment
363367
self.bytez = bytez
368+
self.addr = addr
364369
self.filemd5 = filemd5
370+
self.scope = scope
371+
372+
def json(self):
373+
return {
374+
"sig": self.sig,
375+
"comment": self.comment,
376+
"bytez": binascii.hexlify(self.bytez, " ", bytes_per_sep=1).decode("utf8").upper(),
377+
"addr": self.addr,
378+
"filemd5": self.filemd5,
379+
"scope": self.scope,
380+
}
365381

366382

367383
def get_code_features_for_capa_doc(doc: rd.ResultDocument, extractor):
@@ -411,7 +427,7 @@ def get_code_features_for_capa_doc(doc: rd.ResultDocument, extractor):
411427

412428
bytez = get_cb_bytes(file_vw, addr)
413429
sig = genSigAndMask(addr, bytez, doc.meta.analysis.arch)
414-
code_features.append(CodeFeature(sig, comment, bytez, filemd5))
430+
code_features.append(CodeFeature(sig, comment, bytez, filemd5, addr, capa.rules.BASIC_BLOCK_SCOPE))
415431

416432
for addr, rules in func_matches.items():
417433
comment = f"function at 0x{addr:08x}@{filemd5} with {len(rules)} features:\n"
@@ -421,7 +437,7 @@ def get_code_features_for_capa_doc(doc: rd.ResultDocument, extractor):
421437

422438
bytez = get_function_bytes(file_vw, addr)
423439
sig = genSigAndMask(addr, bytez, doc.meta.analysis.arch)
424-
code_features.append(CodeFeature(sig, comment, bytez, filemd5))
440+
code_features.append(CodeFeature(sig, comment, bytez, filemd5, addr, capa.rules.FUNCTION_SCOPE))
425441

426442
if len(code_features) == 0:
427443
logger.warning("No code features found for %s", filemd5)
@@ -479,7 +495,7 @@ def get_code_features_for_dotnet_doc(doc: rd.ResultDocument, extractor):
479495
func_comment, sig, bytez = get_sig_and_mask_for_dotnet_func(dnpe, f.inner)
480496
comment += func_comment
481497

482-
code_features.append(CodeFeature(sig, comment, bytez, filemd5))
498+
code_features.append(CodeFeature(sig, comment, bytez, filemd5, addr, capa.rules.FUNCTION_SCOPE))
483499

484500
if len(code_features) == 0:
485501
logger.warning("No code features found for %s", filemd5)
@@ -596,6 +612,7 @@ def multi_process_capa(argv=None):
596612
parser.add_argument("input", type=str, nargs="+", help="Path to directory or files to analyze")
597613
parser.add_argument("-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor")
598614
parser.add_argument("--no-mp", action="store_true", help="disable subprocesses")
615+
parser.add_argument("--dump-features", action="store_true", help="output feature dictionary as json")
599616
args = parser.parse_args(args=argv)
600617
capa.main.handle_common_args(args)
601618

@@ -665,9 +682,22 @@ def map(f, args, parallelism=None):
665682

666683
logger.info("Done processing %s samples", len(samples))
667684

685+
if args.dump_features:
686+
dump_file_features(results)
687+
sys.exit(0)
688+
668689
return results
669690

670691

692+
# Output related functions
693+
694+
695+
def dump_file_features(result_dict: dict):
696+
"""Print out bytes for the code features extracted"""
697+
output_dict = {filemd5: [x.json() for x in features] for filemd5, features in result_dict.items()}
698+
print(json.dumps(output_dict, indent=4))
699+
700+
671701
# YARA related functions
672702

673703
CODE_FEATURES_REFERENCED: List[CodeFeature] = []

tests/test_scripts.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,18 @@
77
# See the License for the specific language governing permissions and limitations under the License.
88

99
import sys
10+
import json
1011
import logging
1112
import textwrap
1213
import subprocess
1314
from pathlib import Path
1415
from datetime import date
16+
from functools import lru_cache
1517

1618
import pytest
1719

20+
import capa.rules
21+
1822
logger = logging.getLogger(__name__)
1923

2024
CD = Path(__file__).resolve().parent
@@ -153,6 +157,16 @@ def run_program(script_path, args):
153157
return subprocess.run(args, stdout=subprocess.PIPE)
154158

155159

160+
@lru_cache(maxsize=1)
161+
def get_match_2_yar_features(path, is_dotnet):
162+
script_path = get_script_path("match-2-yar.py")
163+
args = ["--dump-features", path]
164+
if is_dotnet:
165+
args.extend(["-f", "dotnet"])
166+
p = run_program(script_path, args)
167+
return p.stdout
168+
169+
156170
def test_proto_conversion(tmp_path):
157171
t = tmp_path / "proto-test"
158172
t.mkdir()
@@ -283,3 +297,77 @@ def test_detect_duplicate_features(tmpdir):
283297
# Use importlib to import the script
284298
# Use fixtures vivisect to get a vivisect workspace for a given path
285299
# We can use known functions from the yara matches to extract out length, bytes, and masked sig
300+
@pytest.mark.parametrize(
301+
"path,is_dotnet,filemd5,addr,scope,expected_bytestring,expected_sig",
302+
[
303+
# Test match-2-yar x86 EXE - Basic Block Extraction
304+
pytest.param(
305+
get_data_path("9324d1a8ae37a36ae560c37448c9705a.exe_"),
306+
False,
307+
"9324d1a8ae37a36ae560c37448c9705a",
308+
0x004031A0,
309+
capa.rules.BASIC_BLOCK_SCOPE,
310+
"83 EC 10 B0 6C 8B 15 24 A0 40 00 88 44 24 01 88 44 24 02 B0 6F 8D 4C 24 00 88 44 24 04 88 44 24 0B 8B 44 24 14 C6 44 24 00 44 50 51 52 6A 00 C6 44 24 13 53 C6 44 24 15 72 C6 44 24 16 74 C6 44 24 17 57 C6 44 24 18 69 C6 44 24 19 6E C6 44 24 1A 64 C6 44 24 1C 77 C6 44 24 1D 00 E8 EF F7 FF FF A3 C4 A9 40 00 33 C0 83 C4 20 C2 04 00",
311+
"83 EC 10 B0 6C 8B 15 ?? ?? ?? ?? 88 44 24 ?? 88 44 24 ?? B0 6F 8D 4C 24 ?? 88 44 24 ?? 88 44 24 ?? 8B 44 24 ?? C6 44 24 ?? 44 50 51 52 6A 00 C6 44 24 ?? 53 C6 44 24 ?? 72 C6 44 24 ?? 74 C6 44 24 ?? 57 C6 44 24 ?? 69 C6 44 24 ?? 6E C6 44 24 ?? 64 C6 44 24 ?? 77 C6 44 24 ?? 00 E8 ?? ?? ?? ?? A3 ?? ?? ?? ?? 33 C0 83 C4 20 C2 04 00",
312+
),
313+
# Test match-2-yar x86 EXE - Function Extraction
314+
pytest.param(
315+
get_data_path("9324d1a8ae37a36ae560c37448c9705a.exe_"),
316+
False,
317+
"9324d1a8ae37a36ae560c37448c9705a",
318+
0x004019C0,
319+
capa.rules.FUNCTION_SCOPE,
320+
"81 EC 7C 04 00 00 53 55 8B 2D 14 92 40 00 56 8B F1 57 6A 00 8D 44 24 14 8B 8E A8 00 00 00 6A 04 B3 02 50 51 C7 44 24 28 03 00 00 00 C7 44 24 2C 00 00 00 00 C6 44 24 20 05 88 5C 24 21 C6 44 24 22 00 88 5C 24 23 FF D5 B9 96 00 00 00 33 C0 8D BC 24 34 02 00 00 8B 96 A8 00 00 00 F3 AB 8D 44 24 18 8D 4C 24 2C 50 6A 00 6A 00 51 6A 00 89 54 24 44 C7 44 24 40 01 00 00 00 FF 15 10 92 40 00 85 C0 7F 0C 8B 96 A8 00 00 00 52 E9 5D 02 00 00 8B 8E A8 00 00 00 6A 00 8D 84 24 38 02 00 00 68 58 02 00 00 50 51 FF 15 0C 92 40 00 80 BC 24 34 02 00 00 05 0F 85 2C 02 00 00 8A 84 24 35 02 00 00 84 C0 74 0A 3A C3 0F 85 19 02 00 00 EB 08 3A C3 0F 85 30 01 00 00 8B 0D D4 AA 40 00 68 A0 A5 40 00 E8 49 2C 00 00 85 C0 0F 86 18 01 00 00 8B 0D D4 AA 40 00 68 A0 A5 40 00 E8 31 2C 00 00 8B 0D D4 AA 40 00 68 A0 A6 40 00 8B D8 E8 1F 2C 00 00 89 44 24 14 B9 40 00 00 00 33 C0 8D BC 24 30 01 00 00 F3 AB 66 AB 8B 3D 94 90 40 00 8D 94 24 32 01 00 00 68 A0 A5 40 00 52 C6 84 24 38 01 00 00 05 88 9C 24 39 01 00 00 FF D7 8D 44 24 14 6A 04 8D 8C 1C 36 01 00 00 50 51 8B 0D D4 AA 40 00 E8 3B 2A 00 00 8D 94 1C 33 01 00 00 68 A0 A6 40 00 52 FF D7 8B 44 24 14 6A 00 8D 94 24 34 01 00 00 8D 4C 18 03 8B 86 A8 00 00 00 51 52 50 FF D5 8D 54 24 18 33 C0 B9 96 00 00 00 8D BC 24 34 02 00 00 52 50 F3 AB 8B 8E A8 00 00 00 50 8D 44 24 38 89 4C 24 3C 50 6A 00 C7 44 24 40 01 00 00 00 FF 15 10 92 40 00 85 C0 0F 8E 18 01 00 00 8B 86 A8 00 00 00 6A 00 8D 94 24 38 02 00 00 68 58 02 00 00 52 50 FF 15 0C 92 40 00 80 BC 24 34 02 00 00 05 0F 85 EE 00 00 00 8A 84 24 35 02 00 00 84 C0 0F 85 DF 00 00 00 8B 94 24 90 04 00 00 52 FF 15 FC 91 40 00 85 C0 0F 84 D6 00 00 00 C6 44 24 20 05 C6 44 24 21 01 C6 44 24 22 00 C6 44 24 23 01 8B 40 0C 8B 08 8B 84 24 94 04 00 00 50 8B 11 89 54 24 28 FF 15 08 92 40 00 8B 96 A8 00 00 00 6A 00 8D 4C 24 24 6A 0A 51 52 66 89 44 24 38 FF D5 B9 96 00 00 00 33 C0 8D BC 24 34 02 00 00 8D 54 24 2C F3 AB 8B 86 A8 00 00 00 8D 4C 24 18 51 6A 00 6A 00 52 6A 00 89 44 24 44 C7 44 24 40 01 00 00 00 FF 15 10 92 40 00 85 C0 7F 09 8B 86 A8 00 00 00 50 EB 47 8B 96 A8 00 00 00 6A 00 8D 8C 24 38 02 00 00 68 58 02 00 00 51 52 FF 15 0C 92 40 00 80 BC 24 34 02 00 00 05 75 D1 8A 84 24 35 02 00 00 84 C0 75 C6 5F 5E 5D B0 01 5B 81 C4 7C 04 00 00 C2 08 00 8B 8E A8 00 00 00 51 FF 15 04 92 40 00 5F 5E 5D 32 C0 5B 81 C4 7C 04 00 00 C2 08 00",
321+
"81 EC 7C 04 00 00 53 55 8B 2D ?? ?? ?? ?? 56 8B F1 57 6A 00 8D 44 24 ?? 8B 8E ?? ?? ?? ?? 6A 04 B3 02 50 51 C7 44 24 ?? 03 00 00 00 C7 44 24 ?? 00 00 00 00 C6 44 24 ?? 05 88 5C 24 ?? C6 44 24 ?? 00 88 5C 24 ?? FF D5 B9 96 00 00 00 33 C0 8D BC 24 ?? ?? ?? ?? 8B 96 ?? ?? ?? ?? F3 AB 8D 44 24 ?? 8D 4C 24 ?? 50 6A 00 6A 00 51 6A 00 89 54 24 ?? C7 44 24 ?? 01 00 00 00 FF 15 ?? ?? ?? ?? 85 C0 7F ?? 8B 96 ?? ?? ?? ?? 52 E9 ?? ?? ?? ?? 8B 8E ?? ?? ?? ?? 6A 00 8D 84 24 ?? ?? ?? ?? 68 58 02 00 00 50 51 FF 15 ?? ?? ?? ?? 80 BC 24 ?? ?? ?? ?? 05 0F 85 ?? ?? ?? ?? 8A 84 24 ?? ?? ?? ?? 84 C0 74 ?? 3A C3 0F 85 ?? ?? ?? ?? EB ?? 3A C3 0F 85 ?? ?? ?? ?? 8B 0D ?? ?? ?? ?? 68 A0 A5 40 00 E8 ?? ?? ?? ?? 85 C0 0F 86 ?? ?? ?? ?? 8B 0D ?? ?? ?? ?? 68 A0 A5 40 00 E8 ?? ?? ?? ?? 8B 0D ?? ?? ?? ?? 68 A0 A6 40 00 8B D8 E8 ?? ?? ?? ?? 89 44 24 ?? B9 40 00 00 00 33 C0 8D BC 24 ?? ?? ?? ?? F3 AB 66 AB 8B 3D ?? ?? ?? ?? 8D 94 24 ?? ?? ?? ?? 68 A0 A5 40 00 52 C6 84 24 ?? ?? ?? ?? 05 88 9C 24 ?? ?? ?? ?? FF D7 8D 44 24 ?? 6A 04 8D 8C 1C ?? ?? ?? ?? 50 51 8B 0D ?? ?? ?? ?? E8 ?? ?? ?? ?? 8D 94 1C ?? ?? ?? ?? 68 A0 A6 40 00 52 FF D7 8B 44 24 ?? 6A 00 8D 94 24 ?? ?? ?? ?? 8D 4C 18 ?? 8B 86 ?? ?? ?? ?? 51 52 50 FF D5 8D 54 24 ?? 33 C0 B9 96 00 00 00 8D BC 24 ?? ?? ?? ?? 52 50 F3 AB 8B 8E ?? ?? ?? ?? 50 8D 44 24 ?? 89 4C 24 ?? 50 6A 00 C7 44 24 ?? 01 00 00 00 FF 15 ?? ?? ?? ?? 85 C0 0F 8E ?? ?? ?? ?? 8B 86 ?? ?? ?? ?? 6A 00 8D 94 24 ?? ?? ?? ?? 68 58 02 00 00 52 50 FF 15 ?? ?? ?? ?? 80 BC 24 ?? ?? ?? ?? 05 0F 85 ?? ?? ?? ?? 8A 84 24 ?? ?? ?? ?? 84 C0 0F 85 ?? ?? ?? ?? 8B 94 24 ?? ?? ?? ?? 52 FF 15 ?? ?? ?? ?? 85 C0 0F 84 ?? ?? ?? ?? C6 44 24 ?? 05 C6 44 24 ?? 01 C6 44 24 ?? 00 C6 44 24 ?? 01 8B 40 ?? 8B 08 8B 84 24 ?? ?? ?? ?? 50 8B 11 89 54 24 ?? FF 15 ?? ?? ?? ?? 8B 96 ?? ?? ?? ?? 6A 00 8D 4C 24 ?? 6A 0A 51 52 66 89 44 24 ?? FF D5 B9 96 00 00 00 33 C0 8D BC 24 ?? ?? ?? ?? 8D 54 24 ?? F3 AB 8B 86 ?? ?? ?? ?? 8D 4C 24 ?? 51 6A 00 6A 00 52 6A 00 89 44 24 ?? C7 44 24 ?? 01 00 00 00 FF 15 ?? ?? ?? ?? 85 C0 7F ?? 8B 86 ?? ?? ?? ?? 50 EB ?? 8B 96 ?? ?? ?? ?? 6A 00 8D 8C 24 ?? ?? ?? ?? 68 58 02 00 00 51 52 FF 15 ?? ?? ?? ?? 80 BC 24 ?? ?? ?? ?? 05 75 ?? 8A 84 24 ?? ?? ?? ?? 84 C0 75 ?? 5F 5E 5D B0 01 5B 81 C4 7C 04 00 00 C2 08 00 8B 8E ?? ?? ?? ?? 51 FF 15 ?? ?? ?? ?? 5F 5E 5D 32 C0 5B 81 C4 7C 04 00 00 C2 08 00",
322+
),
323+
# Test match-2-yar x64 EXE - Basic Block Extraction
324+
pytest.param(
325+
get_data_path("c2bb17c12975ea61ff43a71afd9c3ff111d018af161859abae0bdb0b3dae98f9.exe_"),
326+
False,
327+
"50580ef0b882905316c4569162ea07d9",
328+
0x14000109F,
329+
capa.rules.BASIC_BLOCK_SCOPE,
330+
"33 C9 BA 1F 03 00 00 41 B8 00 10 00 00 44 8D 49 40 FF 15 4A 0F 00 00 41 B8 1F 03 00 00 48 8B D7 48 8B C8 48 8B D8 E8 65 0D 00 00 48 8D 0D 7F 11 00 00 C7 44 24 20 20 00 00 00 C7 44 24 24 01 00 00 00 48 C7 44 24 28 00 00 00 00 48 89 5C 24 30 48 C7 44 24 38 00 00 00 00 FF 15 0A 0F 00 00 4C 8D 44 24 20 48 8D 15 46 11 00 00 48 8D 0D 77 11 00 00 FF 15 F9 0E 00 00 33 C0 48 8B 4C 24 40 48 33 CC E8 2A 00 00 00 48 8B 5C 24 60 48 83 C4 50 5F C3",
331+
"33 C9 BA 1F 03 00 00 41 B8 00 10 00 00 44 8D 49 ?? FF 15 ?? ?? ?? ?? 41 B8 1F 03 00 00 48 8B D7 48 8B C8 48 8B D8 E8 ?? ?? ?? ?? 48 8D 0D ?? ?? ?? ?? C7 44 24 ?? 20 00 00 00 C7 44 24 ?? 01 00 00 00 48 C7 44 24 ?? 00 00 00 00 48 89 5C 24 ?? 48 C7 44 24 ?? 00 00 00 00 FF 15 ?? ?? ?? ?? 4C 8D 44 24 ?? 48 8D 15 ?? ?? ?? ?? 48 8D 0D ?? ?? ?? ?? FF 15 ?? ?? ?? ?? 33 C0 48 8B 4C 24 ?? 48 33 CC E8 ?? ?? ?? ?? 48 8B 5C 24 ?? 48 83 C4 50 5F C3",
332+
),
333+
# Test match-2-yar x64 EXE - Function Extraction
334+
pytest.param(
335+
get_data_path("c2bb17c12975ea61ff43a71afd9c3ff111d018af161859abae0bdb0b3dae98f9.exe_"),
336+
False,
337+
"50580ef0b882905316c4569162ea07d9",
338+
0x140001010,
339+
capa.rules.FUNCTION_SCOPE,
340+
"48 89 5C 24 08 57 48 83 EC 50 48 8B 05 DF 1F 00 00 48 33 C4 48 89 44 24 40 66 0F 6F 15 8F 12 00 00 48 8D 3D 08 20 00 00 33 C9 B8 00 03 00 00 90 F3 0F 6F 04 39 66 0F EF C2 F3 0F 7F 04 39 F3 0F 6F 4C 39 10 66 0F EF CA F3 0F 7F 4C 39 10 F3 0F 6F 44 39 20 66 0F EF C2 F3 0F 7F 44 39 20 F3 0F 6F 44 39 30 66 0F EF C2 F3 0F 7F 44 39 30 48 83 C1 40 48 3B C8 7C B9 66 0F 1F 84 00 00 00 00 00 80 34 38 62 48 FF C0 48 3D 1F 03 00 00 7C F1 33 C9 BA 1F 03 00 00 41 B8 00 10 00 00 44 8D 49 40 FF 15 4A 0F 00 00 41 B8 1F 03 00 00 48 8B D7 48 8B C8 48 8B D8 E8 65 0D 00 00 48 8D 0D 7F 11 00 00 C7 44 24 20 20 00 00 00 C7 44 24 24 01 00 00 00 48 C7 44 24 28 00 00 00 00 48 89 5C 24 30 48 C7 44 24 38 00 00 00 00 FF 15 0A 0F 00 00 4C 8D 44 24 20 48 8D 15 46 11 00 00 48 8D 0D 77 11 00 00 FF 15 F9 0E 00 00 33 C0 48 8B 4C 24 40 48 33 CC E8 2A 00 00 00 48 8B 5C 24 60 48 83 C4 50 5F C3",
341+
"48 89 5C 24 ?? 57 48 83 EC 50 48 8B 05 ?? ?? ?? ?? 48 33 C4 48 89 44 24 ?? 66 0F 6F 15 ?? ?? 00 00 48 8D 3D ?? ?? ?? ?? 33 C9 B8 00 03 00 00 90 F3 0F 6F 04 39 66 0F EF C2 F3 0F 7F 04 39 F3 0F 6F 4C 39 ?? 66 0F EF CA F3 0F 7F 4C 39 ?? F3 0F 6F 44 39 ?? 66 0F EF C2 F3 0F 7F 44 39 ?? F3 0F 6F 44 39 ?? 66 0F EF C2 F3 0F 7F 44 39 ?? 48 83 C1 40 48 3B C8 7C ?? 66 0F 1F 84 00 ?? ?? 00 00 80 34 38 62 48 FF C0 48 3D 1F 03 00 00 7C ?? 33 C9 BA 1F 03 00 00 41 B8 00 10 00 00 44 8D 49 ?? FF 15 ?? ?? ?? ?? 41 B8 1F 03 00 00 48 8B D7 48 8B C8 48 8B D8 E8 ?? ?? ?? ?? 48 8D 0D ?? ?? ?? ?? C7 44 24 ?? 20 00 00 00 C7 44 24 ?? 01 00 00 00 48 C7 44 24 ?? 00 00 00 00 48 89 5C 24 ?? 48 C7 44 24 ?? 00 00 00 00 FF 15 ?? ?? ?? ?? 4C 8D 44 24 ?? 48 8D 15 ?? ?? ?? ?? 48 8D 0D ?? ?? ?? ?? FF 15 ?? ?? ?? ?? 33 C0 48 8B 4C 24 ?? 48 33 CC E8 ?? ?? ?? ?? 48 8B 5C 24 ?? 48 83 C4 50 5F C3",
342+
),
343+
# Test match-2-yar .NET EXE - Function Extraction
344+
pytest.param(
345+
get_data_path("dotnet/1c444ebeba24dcba8628b7dfe5fec7c6.exe_"),
346+
True,
347+
"1c444ebeba24dcba8628b7dfe5fec7c6",
348+
0x06000073,
349+
capa.rules.FUNCTION_SCOPE,
350+
"03 28 7D 00 00 06 0A 12 01 FE 15 0A 00 00 02 03 12 01 28 7F 00 00 06 26 12 01 7B 7B 00 00 04 12 01 7B 79 00 00 04 59 0C 12 01 7B 7C 00 00 04 12 01 7B 7A 00 00 04 59 0D 06 28 77 00 00 06 13 04 06 08 09 28 76 00 00 06 13 05 11 04 11 05 28 7A 00 00 06 13 06 11 04 16 16 08 09 06 16 16 20 20 00 CC 00 28 75 00 00 06 26 11 04 11 06 28 7A 00 00 06 26 11 04 28 78 00 00 06 26 03 06 28 7E 00 00 06 26 11 05 28 65 00 00 0A 13 07 11 05 28 79 00 00 06 26 11 07 2A",
351+
"03 28 ?? ?? ?? ?? 0A 12 ?? FE 15 ?? ?? ?? ?? 03 12 ?? 28 ?? ?? ?? ?? 26 12 ?? 7B ?? ?? ?? ?? 12 ?? 7B ?? ?? ?? ?? 59 0C 12 ?? 7B ?? ?? ?? ?? 12 ?? 7B ?? ?? ?? ?? 59 0D 06 28 ?? ?? ?? ?? 13 ?? 06 08 09 28 ?? ?? ?? ?? 13 ?? 11 ?? 11 ?? 28 ?? ?? ?? ?? 13 ?? 11 ?? 16 16 08 09 06 16 16 20 ?? ?? ?? ?? 28 ?? ?? ?? ?? 26 11 ?? 11 ?? 28 ?? ?? ?? ?? 26 11 ?? 28 ?? ?? ?? ?? 26 03 06 28 ?? ?? ?? ?? 26 11 ?? 28 ?? ?? ?? ?? 13 ?? 11 ?? 28 ?? ?? ?? ?? 26 11 ?? 2A",
352+
),
353+
],
354+
)
355+
def test_match2yar_feature_extraction(path, is_dotnet, filemd5, addr, scope, expected_bytestring, expected_sig):
356+
"""Test extracting a function byte string using vivisect workspaces"""
357+
output = get_match_2_yar_features(path, is_dotnet)
358+
359+
output = output.decode("utf8")
360+
output_data = json.loads(output)
361+
362+
# Get data for filemd5:
363+
file_features = output_data[filemd5]
364+
365+
# Filter for addr with correct scope
366+
addr_features = [x for x in file_features if x["addr"] == addr and x["scope"] == scope]
367+
368+
# This should be unique
369+
assert len(addr_features) == 1
370+
371+
# Check extraction and masking
372+
assert addr_features[0]["bytez"] == expected_bytestring
373+
assert addr_features[0]["sig"] == expected_sig

0 commit comments

Comments
 (0)