Skip to content

Commit ffbae22

Browse files
spoiicyAkshit Maheshwarymlodic
authored
Floss Capa Refactor (#2933)
* added yarax analyzer * dumped migration and minor changes to analyzer * tracking rules latest version and update analyzer config * created new mixin for helper methods for managing rules * refactored yarax analyzer to helper methods from RulesUtilityMixin * refactored floss analyzer to utilize pip package * refactored capa_info to utilize pip package * added timeout parameter and changed soft time limit * removed docker related sections and updated project-requirements * fixed deepsource issues * fixed deepsource issues * fixed wget command * shell-escaped user input and switched to requests lib to handle downloading of signatures * updated migration numbers * tracking rules_version and minor changes * fixed deepsource issue and updated migration numbers * fixed capa and floss tests * updated migration file and refacored capa_info code to use helper methods from RuleUtiliyMixin * minor changes to log version numbers in mixins * added migration to disable guarddog analyzers --------- Co-authored-by: Akshit Maheshwary <[email protected]> Co-authored-by: Matteo Lodi <[email protected]>
1 parent eb813e3 commit ffbae22

File tree

11 files changed

+344
-102
lines changed

11 files changed

+344
-102
lines changed
Lines changed: 154 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,165 @@
11
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
22
# See the file 'LICENSE' for copying permission.
3-
from typing import Dict
43

5-
from api_app.analyzers_manager.classes import DockerBasedAnalyzer, FileAnalyzer
4+
import json
5+
import logging
6+
import os
7+
import shutil
8+
import subprocess
9+
from pathlib import Path
10+
from shlex import quote
611

12+
import requests
13+
from django.conf import settings
714

8-
class CapaInfo(FileAnalyzer, DockerBasedAnalyzer):
9-
name: str = "Capa"
10-
url: str = "http://malware_tools_analyzers:4002/capa"
11-
# interval between http request polling
12-
poll_distance: int = 10
13-
# http request polling max number of tries
14-
max_tries: int = 60
15-
# here, max_tries * poll_distance = 10 minutes
16-
timeout: int = 60 * 9
17-
# whereas subprocess timeout is kept as 60 * 9 = 9 minutes
15+
from api_app.analyzers_manager.classes import FileAnalyzer
16+
from api_app.analyzers_manager.exceptions import AnalyzerRunException
17+
from api_app.analyzers_manager.models import PythonModule
18+
from api_app.mixins import RulesUtiliyMixin
1819

20+
logger = logging.getLogger(__name__)
21+
22+
BASE_LOCATION = f"{settings.MEDIA_ROOT}/capa"
23+
RULES_LOCATION = f"{BASE_LOCATION}/capa-rules"
24+
SIGNATURE_LOCATION = f"{BASE_LOCATION}/sigs"
25+
RULES_FILE = f"{RULES_LOCATION}/capa_rules.zip"
26+
RULES_URL = "https://github.com/mandiant/capa-rules/archive/refs/tags/"
27+
28+
29+
class CapaInfo(FileAnalyzer, RulesUtiliyMixin):
1930
shellcode: bool
2031
arch: str
32+
timeout: float = 15
33+
force_pull_signatures: bool = False
34+
35+
@classmethod
36+
def _download_signatures(cls) -> None:
37+
logger.info(f"Downloading signatures at {SIGNATURE_LOCATION} now")
38+
39+
if os.path.exists(SIGNATURE_LOCATION):
40+
logger.info(f"Removing existing signatures at {SIGNATURE_LOCATION}")
41+
shutil.rmtree(SIGNATURE_LOCATION)
42+
43+
os.makedirs(SIGNATURE_LOCATION)
44+
logger.info(f"Created fresh signatures directory at {SIGNATURE_LOCATION}")
45+
46+
signatures_url = "https://api.github.com/repos/mandiant/capa/contents/sigs"
47+
try:
48+
response = requests.get(signatures_url)
49+
signatures_list = response.json()
50+
51+
for signature in signatures_list:
52+
53+
filename = signature["name"]
54+
download_url = signature["download_url"]
55+
56+
signature_file_path = os.path.join(SIGNATURE_LOCATION, filename)
57+
58+
sig_content = requests.get(download_url, stream=True)
59+
with open(signature_file_path, mode="wb") as file:
60+
for chunk in sig_content.iter_content(chunk_size=10 * 1024):
61+
file.write(chunk)
62+
63+
except Exception as e:
64+
logger.error(f"Failed to download signature: {e}")
65+
raise AnalyzerRunException("Failed to update signatures")
66+
logger.info("Successfully updated signatures")
67+
68+
@classmethod
69+
def update(cls, anayzer_module: PythonModule) -> bool:
70+
try:
71+
logger.info("Updating capa rules")
72+
response = requests.get(
73+
"https://api.github.com/repos/mandiant/capa-rules/releases/latest"
74+
)
75+
latest_version = response.json()["tag_name"]
76+
capa_rules_download_url = RULES_URL + latest_version + ".zip"
77+
78+
cls._download_rules(
79+
rule_set_download_url=capa_rules_download_url,
80+
rule_set_directory=RULES_LOCATION,
81+
rule_file_path=RULES_FILE,
82+
latest_version=latest_version,
83+
analyzer_module=anayzer_module,
84+
)
85+
86+
cls._unzip(Path(RULES_FILE))
87+
88+
logger.info("Successfully updated capa rules")
89+
90+
return True
2191

22-
def config(self, runtime_configuration: Dict):
23-
super().config(runtime_configuration)
24-
self.args = []
25-
if self.arch != "64":
26-
self.arch = "32"
27-
if self.shellcode:
28-
self.args.append("-f")
29-
self.args.append("sc" + self.arch)
92+
except Exception as e:
93+
logger.error(f"Failed to update capa rules with error: {e}")
94+
95+
return False
3096

3197
def run(self):
32-
# get binary
33-
binary = self.read_file_bytes()
34-
# make request data
35-
fname = str(self.filename).replace("/", "_").replace(" ", "_")
36-
args = [f"@{fname}", *self.args]
37-
req_data = {"args": args, "timeout": self.timeout}
38-
req_files = {fname: binary}
39-
40-
return self._docker_run(req_data, req_files)
98+
try:
99+
100+
response = requests.get(
101+
"https://api.github.com/repos/mandiant/capa-rules/releases/latest"
102+
)
103+
latest_version = response.json()["tag_name"]
104+
105+
capa_analyzer_module = self.python_module
106+
107+
update_status = (
108+
True
109+
if self._check_if_latest_version(latest_version, capa_analyzer_module)
110+
else self.update(capa_analyzer_module)
111+
)
112+
113+
if self.force_pull_signatures or not os.path.isdir(SIGNATURE_LOCATION):
114+
self._download_signatures()
115+
116+
if not (os.path.isdir(RULES_LOCATION)) and not update_status:
117+
118+
raise AnalyzerRunException("Couldn't update capa rules")
119+
120+
command: list[str] = ["/usr/local/bin/capa", "--quiet", "--json"]
121+
shell_code_arch = "sc64" if self.arch == "64" else "sc32"
122+
if self.shellcode:
123+
command.append("-f")
124+
command.append(shell_code_arch)
125+
126+
# Setting default capa-rules path
127+
command.append("-r")
128+
command.append(RULES_LOCATION)
129+
130+
# Setting default signatures location
131+
command.append("-s")
132+
command.append(SIGNATURE_LOCATION)
133+
134+
command.append(quote(self.filepath))
135+
136+
logger.info(
137+
f"Starting CAPA analysis for {self.filename} with hash: {self.md5} and command: {command}"
138+
)
139+
140+
process: subprocess.CompletedProcess = subprocess.run(
141+
command,
142+
capture_output=True,
143+
text=True,
144+
timeout=self.timeout,
145+
check=True,
146+
)
147+
148+
result = json.loads(process.stdout)
149+
result["command_executed"] = command
150+
result["rules_version"] = latest_version
151+
152+
logger.info(
153+
f"CAPA analysis successfully completed for file: {self.filename} with hash {self.md5}"
154+
)
155+
156+
except subprocess.CalledProcessError as e:
157+
stderr = e.stderr
158+
logger.info(
159+
f"Capa Info failed to run for {self.filename} with hash: {self.md5} with command {e}"
160+
)
161+
raise AnalyzerRunException(
162+
f" Analyzer for {self.filename} with hash: {self.md5} failed with error: {stderr}"
163+
)
164+
165+
return result

api_app/analyzers_manager/file_analyzers/floss.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
22
# See the file 'LICENSE' for copying permission.
33

4-
from json import dumps as json_dumps
4+
import logging
5+
import subprocess
6+
from json import dumps, loads
7+
from shlex import quote
58

69
from api_app.analyzers_manager.classes import DockerBasedAnalyzer, FileAnalyzer
710
from api_app.analyzers_manager.exceptions import AnalyzerRunException
811

12+
logger = logging.getLogger(__name__)
13+
914

1015
class Floss(FileAnalyzer, DockerBasedAnalyzer):
1116
name: str = "Floss"
12-
url: str = "http://malware_tools_analyzers:4002/floss"
13-
ranking_url: str = "http://malware_tools_analyzers:4002/stringsifter"
17+
url: str = "http://malware_tools_analyzers:4002/stringsifter"
1418
# interval between http request polling
1519
poll_distance: int = 10
1620
# http request polling max number of tries
@@ -29,30 +33,39 @@ def update(cls) -> bool:
2933
pass
3034

3135
def run(self):
32-
# get binary
33-
binary = self.read_file_bytes()
34-
# make request data
35-
fname = str(self.filename).replace("/", "_").replace(" ", "_")
3636
# From floss v3 there is prompt that can be overcome
3737
# by using the flag --no static.
3838
# We can lose static strings considering that we can easily
3939
# retrieve them with more simple tools
40-
args = [f"@{fname}", "--json", "--no", "static"]
41-
req_data = {"args": args, "timeout": self.timeout}
42-
req_files = {fname: binary}
43-
result = self._docker_run(req_data, req_files)
44-
if not isinstance(result, dict):
40+
try:
41+
process: subprocess.CompletedProcess = subprocess.run(
42+
[
43+
"/usr/local/bin/floss",
44+
"--json",
45+
"--no",
46+
"static",
47+
"--",
48+
quote(self.filepath),
49+
],
50+
capture_output=True,
51+
text=True,
52+
check=True,
53+
)
54+
55+
result = loads(process.stdout)
56+
57+
except subprocess.CalledProcessError as e:
58+
stderr = e.stderr
59+
logger.info(f"Floss failed to run for {self.filename} with command {e}")
4560
raise AnalyzerRunException(
46-
f"result from floss tool is not a dict but is {type(result)}."
47-
f" Full dump: {result}"
61+
f" Analyzer for {self.filename} failed with error: {stderr}"
4862
)
63+
4964
result["exceeded_max_number_of_strings"] = {}
50-
# we are changing the endpoint of _docker_run to stringsifter
51-
self.url = self.ranking_url
5265

5366
for key in self.max_no_of_strings:
5467
if self.rank_strings[key]:
55-
strings = json_dumps(result["strings"][key])
68+
strings = dumps(result["strings"][key])
5669
# 4 is the number of arguments that we are already passing
5770
analyzable_strings = strings[: self.OS_MAX_ARGS - 5]
5871
args = [
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Generated by Django 4.2.17 on 2025-07-24 14:57
2+
3+
from django.db import migrations
4+
5+
6+
def migrate(apps, schema_editor):
7+
PythonModule = apps.get_model("api_app", "PythonModule")
8+
Parameter = apps.get_model("api_app", "Parameter")
9+
PluginConfig = apps.get_model("api_app", "PluginConfig")
10+
CrontabSchedule = apps.get_model("django_celery_beat", "CrontabSchedule")
11+
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
12+
13+
pm = PythonModule.objects.get(
14+
module="capa_info.CapaInfo",
15+
base_path="api_app.analyzers_manager.file_analyzers",
16+
)
17+
18+
new_crontab, created = CrontabSchedule.objects.get_or_create(
19+
minute="0",
20+
hour="0",
21+
day_of_week="*",
22+
day_of_month="*",
23+
month_of_year="*",
24+
timezone="UTC",
25+
)
26+
if created:
27+
pm.update_schedule = new_crontab
28+
pm.full_clean()
29+
pm.save()
30+
31+
AnalyzerConfig.objects.filter(python_module=pm).update(soft_time_limit=1800)
32+
AnalyzerConfig.objects.filter(python_module=pm).update(docker_based=False)
33+
34+
p1 = Parameter(
35+
name="timeout",
36+
type="float",
37+
description="Duration in seconds for which intelowl waits for capa to return results. Default is set to 15 seconds.",
38+
is_secret=False,
39+
required=False,
40+
python_module=pm,
41+
)
42+
43+
p2 = Parameter(
44+
name="force_pull_signatures",
45+
type="bool",
46+
description="Force download signatures from flare-capa github repository",
47+
is_secret=False,
48+
required=False,
49+
python_module=pm,
50+
)
51+
52+
p1.full_clean()
53+
p1.save()
54+
55+
p2.full_clean()
56+
p2.save()
57+
58+
analyzer_configs = AnalyzerConfig.objects.filter(python_module=pm)
59+
60+
plugin_config_to_create = [
61+
PluginConfig(analyzer_config=config, parameter=p1, value=15)
62+
for config in analyzer_configs
63+
]
64+
65+
PluginConfig.objects.bulk_create(plugin_config_to_create)
66+
67+
68+
class Migration(migrations.Migration):
69+
70+
dependencies = [
71+
("analyzers_manager", "0167_analyzerrulesfileversion"),
72+
]
73+
74+
operations = [migrations.RunPython(migrate, migrations.RunPython.noop)]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Generated by Django 4.2.25 on 2025-11-03 10:37
2+
3+
from django.db import migrations
4+
5+
6+
def migrate(apps, schema_editor):
7+
PythonModule = apps.get_model("api_app", "PythonModule")
8+
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
9+
10+
guarddog_file_module = PythonModule.objects.get(
11+
module="guarddog_file.GuardDogFile",
12+
base_path="api_app.analyzers_manager.file_analyzers",
13+
)
14+
15+
guarddog_generic_module = PythonModule.objects.get(
16+
module="guarddog_generic.GuardDogGeneric",
17+
base_path="api_app.analyzers_manager.observable_analyzers",
18+
)
19+
20+
AnalyzerConfig.objects.filter(python_module=guarddog_file_module).update(
21+
disabled=True
22+
)
23+
AnalyzerConfig.objects.filter(python_module=guarddog_generic_module).update(
24+
disabled=True
25+
)
26+
27+
28+
class Migration(migrations.Migration):
29+
30+
dependencies = [
31+
("analyzers_manager", "0168_update_capa"),
32+
]
33+
34+
operations = [migrations.RunPython(migrate, migrations.RunPython.noop)]

0 commit comments

Comments
 (0)