Skip to content

Commit eb813e3

Browse files
spoiicyAkshit Maheshwary
andauthored
YaraX Analyzer with Yara-Forge Rule Repository integration (#2980)
* added yarax analyzer * dumped migration and minor changes to analyzer * tracking rules latest version and update analyzer config * created new mixin for helper methods for managing rules * refactored yarax analyzer to helper methods from RulesUtilityMixin --------- Co-authored-by: Akshit Maheshwary <[email protected]>
1 parent 63589e5 commit eb813e3

File tree

6 files changed

+469
-1
lines changed

6 files changed

+469
-1
lines changed
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import logging
2+
import os
3+
import pathlib
4+
5+
import requests
6+
import yara_x
7+
from django.conf import settings
8+
9+
from api_app.analyzers_manager.classes import FileAnalyzer
10+
from api_app.analyzers_manager.exceptions import AnalyzerRunException
11+
from api_app.mixins import RulesUtiliyMixin
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
RULES_URL = "https://api.github.com/repos/YARAHQ/yara-forge/releases/latest"
17+
BASE_RULES_LOCATION = f"{settings.MEDIA_ROOT}/yarax"
18+
19+
20+
class YaraX(FileAnalyzer, RulesUtiliyMixin):
21+
rule_set: str = "core"
22+
23+
def get_rule_location(self):
24+
logger.info(f"Searching for rules at {BASE_RULES_LOCATION}/{self.rule_set}")
25+
try:
26+
rule_set_dir = pathlib.Path(BASE_RULES_LOCATION) / self.rule_set
27+
rule = rule_set_dir.rglob("*.yar")
28+
return next(rule).__str__()
29+
30+
except StopIteration as e:
31+
logger.error(f"{self.rule_set} not found, function exited with error {e}")
32+
raise AnalyzerRunException(f"{self.rule_set} rules not present")
33+
34+
@classmethod
35+
def update(cls, rule_set, analyzer_module) -> bool:
36+
logger.info(f"Updating {rule_set} rule set")
37+
rule_set_download_url = ""
38+
filename = ""
39+
try:
40+
response = requests.get(RULES_URL)
41+
assets = response.json()["assets"]
42+
latest_version = response.json()["tag_name"]
43+
for asset in assets:
44+
if rule_set in asset["browser_download_url"]:
45+
rule_set_download_url = asset["browser_download_url"]
46+
filename = asset["name"]
47+
break
48+
49+
rule_set_directory = f"{BASE_RULES_LOCATION}/{rule_set}"
50+
rule_file_path = f"{rule_set_directory}/{filename}"
51+
52+
cls._download_rules(
53+
rule_set_download_url,
54+
rule_set_directory,
55+
rule_file_path,
56+
latest_version,
57+
analyzer_module,
58+
)
59+
60+
rule_file_path = pathlib.Path(BASE_RULES_LOCATION) / rule_set / filename
61+
62+
cls._unzip(rule_file_path)
63+
64+
logger.info(f"Successfully updated {rule_set} rules")
65+
return True
66+
67+
except Exception as e:
68+
logger.exception(f"Failed to update yara-forge rules. Error: {e}")
69+
raise AnalyzerRunException(
70+
f"Failed to update yara-forge ruleset. Error: {e}"
71+
)
72+
73+
return False
74+
75+
def run(self):
76+
77+
if self.rule_set not in ("core", "extended", "full"):
78+
raise AnalyzerRunException(
79+
"Please select the correct ruleset pack from available options."
80+
" Available options are core, extended, full"
81+
)
82+
83+
rule_dir = f"{BASE_RULES_LOCATION}/{self.rule_set}"
84+
85+
response = requests.get(RULES_URL)
86+
87+
latest_version = response.json()["tag_name"]
88+
89+
update_status = (
90+
True
91+
if self._check_if_latest_version(latest_version, self.python_module)
92+
else self.update(self.rule_set, self.python_module)
93+
)
94+
95+
if not os.path.isdir(rule_dir) and not update_status:
96+
raise AnalyzerRunException(f"Couldn't update {self.rule_set} rules")
97+
98+
rules_file_path = self.get_rule_location()
99+
logger.info(f"Found rules at {rules_file_path}")
100+
101+
with open(rules_file_path, mode="r") as f:
102+
rules_source = f.read()
103+
104+
try:
105+
logger.info(f"Compiling rules present at {self.rule_set}")
106+
compiler = yara_x.Compiler()
107+
compiler.add_source(rules_source, origin=rules_file_path)
108+
rules = compiler.build()
109+
logger.info("Successfully compiled and built rules")
110+
111+
logger.info(
112+
f"Starting scanning file: {self.filename} having hash: {self.md5} with {self.rule_set} rules"
113+
)
114+
scanner = yara_x.Scanner(rules)
115+
116+
result = []
117+
scan_results = scanner.scan_file(self.filepath)
118+
for rule in scan_results.matching_rules:
119+
logger.info(f"Rule Identifier: {rule.identifier}")
120+
rule_metadata = dict(rule.metadata)
121+
rule_details = {
122+
"rule_identifier": rule.identifier,
123+
"rule_metadata": rule_metadata,
124+
"pattern_details": [],
125+
}
126+
for pattern in rule.patterns:
127+
pattern_details = {
128+
"pattern_identifier": pattern.identifier,
129+
"match_details": [],
130+
}
131+
132+
for match in pattern.matches:
133+
match_details = {
134+
"match_offset": match.offset,
135+
"match_length": match.length,
136+
"match_xor_key": match.xor_key,
137+
}
138+
pattern_details["match_details"].append(match_details)
139+
140+
rule_details["pattern_details"].append(pattern_details)
141+
142+
result.append(rule_details)
143+
144+
logger.info(f"Successfully scanned {self.filename} with hash {self.md5}")
145+
146+
return {"results": "No Match"} if not result else {"results": result}
147+
148+
except yara_x.CompileError as e:
149+
logger.error(
150+
f"Failed to compile {self.rule_set} rules present at {rules_file_path} with error {e}"
151+
)
152+
raise AnalyzerRunException(f"Failed to compile {self.rule_set} rules")
153+
154+
except yara_x.ScanError as e:
155+
logger.error(f"Failed to scan file {self.filename} with error {e}")
156+
raise AnalyzerRunException(f"Failed to scan {self.filename}")
157+
158+
except yara_x.TimeoutError as e:
159+
logger.error(f"Failed with timeout with error {e}")
160+
raise AnalyzerRunException("Failed with timeout")
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
from django.db import migrations
2+
from django.db.models.fields.related_descriptors import (
3+
ForwardManyToOneDescriptor,
4+
ForwardOneToOneDescriptor,
5+
ManyToManyDescriptor,
6+
ReverseManyToOneDescriptor,
7+
ReverseOneToOneDescriptor,
8+
)
9+
10+
plugin = {
11+
"python_module": {
12+
"health_check_schedule": None,
13+
"update_schedule": None,
14+
"module": "yarax.YaraX",
15+
"base_path": "api_app.analyzers_manager.file_analyzers",
16+
},
17+
"name": "YaraX",
18+
"description": "[YaraX](https://virustotal.github.io/yara-x/docs/intro/getting-started/) is a re-incarnation of YARA, a pattern matching tool designed with malware researchers in mind. This new incarnation intends to be faster, safer and more user-friendly than its predecessor.",
19+
"disabled": False,
20+
"soft_time_limit": 480,
21+
"routing_key": "default",
22+
"health_check_status": True,
23+
"type": "file",
24+
"docker_based": False,
25+
"maximum_tlp": "RED",
26+
"observable_supported": [],
27+
"supported_filetypes": [],
28+
"run_hash": False,
29+
"run_hash_type": "",
30+
"not_supported_filetypes": ["application/vnd.tcpdump.pcap"],
31+
"mapping_data_model": {},
32+
"model": "analyzers_manager.AnalyzerConfig",
33+
}
34+
35+
params = [
36+
{
37+
"python_module": {
38+
"module": "yarax.YaraX",
39+
"base_path": "api_app.analyzers_manager.file_analyzers",
40+
},
41+
"name": "rule_set",
42+
"type": "str",
43+
"description": "Yara-Forge Ruleset pack to use. Available options are core, extended, full. By default, core ruleset pack is selected for analysis. Refer the docs for more information.",
44+
"is_secret": False,
45+
"required": False,
46+
}
47+
]
48+
49+
values = [
50+
{
51+
"parameter": {
52+
"python_module": {
53+
"module": "yarax.YaraX",
54+
"base_path": "api_app.analyzers_manager.file_analyzers",
55+
},
56+
"name": "rule_set",
57+
"type": "str",
58+
"description": "Yara-Forge Ruleset pack to use. Available options are core, extended, full. By default, core ruleset pack is selected for analysis. Refer the docs for more information.",
59+
"is_secret": False,
60+
"required": False,
61+
},
62+
"analyzer_config": "YaraX",
63+
"connector_config": None,
64+
"visualizer_config": None,
65+
"ingestor_config": None,
66+
"pivot_config": None,
67+
"for_organization": False,
68+
"value": "core",
69+
"updated_at": "2025-09-07T11:44:16.043896Z",
70+
"owner": None,
71+
}
72+
]
73+
74+
75+
def _get_real_obj(Model, field, value):
76+
def _get_obj(Model, other_model, value):
77+
if isinstance(value, dict):
78+
real_vals = {}
79+
for key, real_val in value.items():
80+
real_vals[key] = _get_real_obj(other_model, key, real_val)
81+
value = other_model.objects.get_or_create(**real_vals)[0]
82+
# it is just the primary key serialized
83+
else:
84+
if isinstance(value, int):
85+
if Model.__name__ == "PluginConfig":
86+
value = other_model.objects.get(name=plugin["name"])
87+
else:
88+
value = other_model.objects.get(pk=value)
89+
else:
90+
value = other_model.objects.get(name=value)
91+
return value
92+
93+
if (
94+
type(getattr(Model, field))
95+
in [
96+
ForwardManyToOneDescriptor,
97+
ReverseManyToOneDescriptor,
98+
ReverseOneToOneDescriptor,
99+
ForwardOneToOneDescriptor,
100+
]
101+
and value
102+
):
103+
other_model = getattr(Model, field).get_queryset().model
104+
value = _get_obj(Model, other_model, value)
105+
elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value:
106+
other_model = getattr(Model, field).rel.model
107+
value = [_get_obj(Model, other_model, val) for val in value]
108+
return value
109+
110+
111+
def _create_object(Model, data):
112+
mtm, no_mtm = {}, {}
113+
for field, value in data.items():
114+
value = _get_real_obj(Model, field, value)
115+
if type(getattr(Model, field)) is ManyToManyDescriptor:
116+
mtm[field] = value
117+
else:
118+
no_mtm[field] = value
119+
try:
120+
o = Model.objects.get(**no_mtm)
121+
except Model.DoesNotExist:
122+
o = Model(**no_mtm)
123+
o.full_clean()
124+
o.save()
125+
for field, value in mtm.items():
126+
attribute = getattr(o, field)
127+
if value is not None:
128+
attribute.set(value)
129+
return False
130+
return True
131+
132+
133+
def migrate(apps, schema_editor):
134+
Parameter = apps.get_model("api_app", "Parameter")
135+
PluginConfig = apps.get_model("api_app", "PluginConfig")
136+
python_path = plugin.pop("model")
137+
Model = apps.get_model(*python_path.split("."))
138+
if not Model.objects.filter(name=plugin["name"]).exists():
139+
exists = _create_object(Model, plugin)
140+
if not exists:
141+
for param in params:
142+
_create_object(Parameter, param)
143+
for value in values:
144+
_create_object(PluginConfig, value)
145+
146+
147+
def reverse_migrate(apps, schema_editor):
148+
python_path = plugin.pop("model")
149+
Model = apps.get_model(*python_path.split("."))
150+
Model.objects.get(name=plugin["name"]).delete()
151+
152+
153+
class Migration(migrations.Migration):
154+
atomic = False
155+
dependencies = [
156+
("api_app", "0071_delete_last_elastic_report"),
157+
("analyzers_manager", "0165_analyzer_config_joesandboxurl"),
158+
]
159+
160+
operations = [migrations.RunPython(migrate, reverse_migrate)]
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Generated by Django 4.2.17 on 2025-09-07 12:15
2+
3+
import django.db.models.deletion
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
("api_app", "0071_delete_last_elastic_report"),
11+
("analyzers_manager", "0166_analyzer_config_yarax"),
12+
]
13+
14+
operations = [
15+
migrations.CreateModel(
16+
name="AnalyzerRulesFileVersion",
17+
fields=[
18+
(
19+
"id",
20+
models.BigAutoField(
21+
auto_created=True,
22+
primary_key=True,
23+
serialize=False,
24+
verbose_name="ID",
25+
),
26+
),
27+
(
28+
"last_downloaded_version",
29+
models.CharField(blank=True, default="", max_length=50),
30+
),
31+
("download_url", models.URLField(blank=True, default="")),
32+
("downloaded_at", models.DateTimeField(auto_now_add=True)),
33+
(
34+
"python_module",
35+
models.ForeignKey(
36+
on_delete=django.db.models.deletion.PROTECT,
37+
related_name="rules_version",
38+
to="api_app.pythonmodule",
39+
),
40+
),
41+
],
42+
),
43+
]

api_app/analyzers_manager/models.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,3 +350,13 @@ def plugin_type(cls) -> str:
350350
@property
351351
def config_exception(cls):
352352
return AnalyzerConfigurationException
353+
354+
355+
class AnalyzerRulesFileVersion(models.Model):
356+
last_downloaded_version = models.CharField(max_length=50, blank=True, default="")
357+
download_url = models.URLField(max_length=200, blank=True, default="")
358+
downloaded_at = models.DateTimeField(auto_now_add=True)
359+
360+
python_module = models.ForeignKey(
361+
PythonModule, on_delete=models.PROTECT, related_name="rules_version"
362+
)

0 commit comments

Comments
 (0)