Skip to content

Commit fdacc7c

Browse files
authored
Engine + Analyzable (#2685)
* engine * More stuff * More engine * More engine * Removed old code * Blake * Fixes * Fixes * Fixes * Fixes * Fix * Fix merge with dict * Fix * More tests * Added another engine * Analyzable * Blake * Fixes * Fixes * Fixes * Added error * Added delete * Fixes * Fixes * Blake * Fix * More fixes * Fixes * Fixes * update * Fixes * Fixes * Fix typo * More fixes * More fixes * Fixed files * Update deepsource * Fixes * Typo * Fixes * Fixes * Fixes * Fixes * Blake * Fix * Fix * Typo
1 parent 0630004 commit fdacc7c

File tree

189 files changed

+3028
-1164
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

189 files changed

+3028
-1164
lines changed

api_app/admin.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,8 @@ class JobAdminView(CustomAdminView):
4949
"id",
5050
"status",
5151
"user",
52-
"observable_name",
53-
"observable_classification",
54-
"file_name",
55-
"file_mimetype",
52+
"get_analyzable_name",
53+
"get_analyzable_classification",
5654
"received_request_time",
5755
"analyzers_executed",
5856
"connectors_executed",
@@ -64,13 +62,16 @@ class JobAdminView(CustomAdminView):
6462
"user",
6563
"status",
6664
)
67-
search_fields = (
68-
"md5",
69-
"observable_name",
70-
"file_name",
71-
)
7265
list_filter = ("status", "user", "tags")
7366

67+
@admin.display(description="Name")
68+
def get_analyzable_name(self, instance):
69+
return instance.analyzable.name
70+
71+
@admin.display(description="Classification")
72+
def get_analyzable_classification(self, instance):
73+
return instance.analyzable.classification
74+
7475
@staticmethod
7576
def has_add_permission(request: HttpRequest) -> bool:
7677
return False

api_app/analyzables_manager/__init__.py

Whitespace-only changes.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from django.contrib import admin
2+
3+
from api_app.analyzables_manager.models import Analyzable
4+
5+
6+
@admin.register(Analyzable)
7+
class AnalyzableAdmin(admin.ModelAdmin):
8+
list_display = ["pk", "name", "sha1", "sha256", "md5"]
9+
search_fields = ["name", "sha1", "sha256", "md5"]
10+
ordering = ["name"]
11+
list_filter = ["discovery_date"]

api_app/analyzables_manager/apps.py

Whitespace-only changes.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Generated by Django 4.2.17 on 2025-01-22 08:59
2+
3+
from django.db import migrations, models
4+
from django.utils.timezone import now
5+
6+
import api_app.defaults
7+
8+
9+
class Migration(migrations.Migration):
10+
11+
initial = True
12+
13+
dependencies = []
14+
15+
operations = [
16+
migrations.CreateModel(
17+
name="Analyzable",
18+
fields=[
19+
(
20+
"id",
21+
models.BigAutoField(
22+
auto_created=True,
23+
primary_key=True,
24+
serialize=False,
25+
verbose_name="ID",
26+
),
27+
),
28+
("md5", models.CharField(max_length=255, unique=True, editable=False)),
29+
(
30+
"sha256",
31+
models.CharField(max_length=255, unique=True, editable=False),
32+
),
33+
("sha1", models.CharField(max_length=255, unique=True, editable=False)),
34+
("name", models.CharField(max_length=255)),
35+
(
36+
"mimetype",
37+
models.CharField(
38+
blank=True, max_length=80, null=True, default=None
39+
),
40+
),
41+
(
42+
"file",
43+
models.FileField(
44+
null=True,
45+
default=None,
46+
blank=True,
47+
upload_to=api_app.defaults.file_directory_path,
48+
),
49+
),
50+
(
51+
"classification",
52+
models.CharField(
53+
max_length=100,
54+
choices=[
55+
("ip", "Ip"),
56+
("url", "Url"),
57+
("domain", "Domain"),
58+
("hash", "Hash"),
59+
("generic", "Generic"),
60+
("file", "File"),
61+
],
62+
),
63+
),
64+
("discovery_date", models.DateTimeField(default=now)),
65+
],
66+
options={
67+
"abstract": False,
68+
},
69+
),
70+
]
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import hashlib
2+
3+
from django.db import migrations
4+
from django.db.models import F, OuterRef, Subquery, Window
5+
from django.db.models.functions import RowNumber
6+
7+
8+
def calculate_sha1(value: bytes) -> str:
9+
return hashlib.sha1(value).hexdigest() # skipcq BAN-B324
10+
11+
12+
def calculate_sha256(value: bytes) -> str:
13+
return hashlib.sha256(value).hexdigest() # skipcq BAN-B324
14+
15+
16+
def migrate(apps, schema_editor):
17+
Job = apps.get_model("api_app", "Job")
18+
Analyzable = apps.get_model("analyzables_manager", "Analyzable")
19+
# get only on job for md5
20+
jobs = Job.objects.alias(
21+
row_number=Window(
22+
RowNumber(), partition_by=(F("md5"),), order_by="received_request_time"
23+
)
24+
).filter(row_number=1)
25+
for job in jobs:
26+
if job.is_sample:
27+
an = Analyzable.objects.create(
28+
md5=job.md5,
29+
sha256=job.sha256,
30+
sha1=job.sha1,
31+
file=job.file,
32+
mimetype=job.file_mimetype,
33+
name=job.file_name,
34+
classification="sample",
35+
discovery_date=job.received_request_time,
36+
)
37+
38+
p = job.file.path
39+
try:
40+
p.rename(p.parent / job.md5)
41+
except Exception:
42+
print(f"Error: unable to rename {job}")
43+
else:
44+
job.file.name = job.md5
45+
with open(p, "rb") as f:
46+
content = f.read()
47+
f.seek(0)
48+
an.sha1 = calculate_sha1(content)
49+
an.sha256 = calculate_sha256(content)
50+
else:
51+
an = Analyzable.objects.create(
52+
md5=job.md5,
53+
name=job.observable_name,
54+
classification=job.observable_classification,
55+
discovery_date=job.received_request_time,
56+
)
57+
an.sha1 = calculate_sha1(an.name.encode("utf-8"))
58+
an.sha256 = calculate_sha256(an.name.encode("utf-8"))
59+
an.save()
60+
Job.objects.update(
61+
analyzable=Subquery(
62+
Analyzable.objects.filter(md5=OuterRef("md5")).values("pk")[:1]
63+
)
64+
)
65+
66+
67+
class Migration(migrations.Migration):
68+
69+
dependencies = [
70+
("contenttypes", "0002_remove_content_type_name"),
71+
("api_app", "0067_add_analyzable"),
72+
("analyzables_manager", "0001_initial"),
73+
("visualizers_manager", "0040_visualizer_config_data_model"),
74+
]
75+
76+
operations = [
77+
migrations.RunPython(migrate, migrations.RunPython.noop),
78+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Generated by Django 4.2.17 on 2025-01-23 14:38
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("analyzables_manager", "0002_migrate_data"),
10+
]
11+
12+
operations = [
13+
migrations.AddIndex(
14+
model_name="analyzable",
15+
index=models.Index(
16+
fields=["classification"], name="analyzables_classif_adf7ca_idx"
17+
),
18+
),
19+
migrations.AddIndex(
20+
model_name="analyzable",
21+
index=models.Index(
22+
fields=["mimetype"], name="analyzables_mimetyp_321d7d_idx"
23+
),
24+
),
25+
]

api_app/analyzables_manager/migrations/__init__.py

Whitespace-only changes.
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
from typing import Type, Union
2+
3+
from django.core.exceptions import ValidationError
4+
from django.db import models
5+
from django.utils.timezone import now
6+
7+
from api_app.analyzables_manager.queryset import AnalyzableQuerySet
8+
from api_app.choices import Classification
9+
from api_app.data_model_manager.models import (
10+
BaseDataModel,
11+
DomainDataModel,
12+
FileDataModel,
13+
IPDataModel,
14+
)
15+
from api_app.defaults import file_directory_path
16+
from api_app.helpers import calculate_md5, calculate_sha1, calculate_sha256
17+
18+
19+
class Analyzable(models.Model):
20+
name = models.CharField(max_length=255)
21+
discovery_date = models.DateTimeField(default=now)
22+
md5 = models.CharField(max_length=255, unique=True, editable=False)
23+
sha256 = models.CharField(max_length=255, unique=True, editable=False)
24+
sha1 = models.CharField(max_length=255, unique=True, editable=False)
25+
classification = models.CharField(max_length=100, choices=Classification.choices)
26+
mimetype = models.CharField(max_length=80, blank=True, null=True, default=None)
27+
file = models.FileField(
28+
upload_to=file_directory_path, null=True, blank=True, default=None
29+
)
30+
31+
objects = AnalyzableQuerySet.as_manager()
32+
33+
class Meta:
34+
indexes = [
35+
models.Index(fields=["classification"]),
36+
models.Index(fields=["mimetype"]),
37+
]
38+
39+
def __str__(self):
40+
return self.name
41+
42+
@property
43+
def analyzed_object(self):
44+
return self.file if self.is_sample else self.name
45+
46+
@property
47+
def is_sample(self) -> bool:
48+
return self.classification == Classification.FILE.value
49+
50+
def get_data_model_class(self) -> Type[BaseDataModel]:
51+
if self.classification == Classification.IP.value:
52+
return IPDataModel
53+
elif self.classification in [
54+
Classification.URL.value,
55+
Classification.DOMAIN.value,
56+
]:
57+
return DomainDataModel
58+
elif self.classification in [
59+
Classification.HASH.value,
60+
Classification.FILE.value,
61+
]:
62+
return FileDataModel
63+
else:
64+
raise NotImplementedError()
65+
66+
def _set_hashes(self, value: Union[str, bytes]):
67+
if isinstance(value, str):
68+
value = value.encode("utf-8")
69+
if not self.md5:
70+
self.md5 = calculate_md5(value)
71+
if not self.sha256:
72+
self.sha256 = calculate_sha256(value)
73+
if not self.sha1:
74+
self.sha1 = calculate_sha1(value)
75+
76+
def clean(self):
77+
if self.classification == Classification.FILE.value:
78+
from api_app.analyzers_manager.models import MimeTypes
79+
80+
if not self.file:
81+
raise ValidationError("File must be set for samples")
82+
content = self.read()
83+
if not self.mimetype:
84+
self.mimetype = MimeTypes.calculate(content, self.name)
85+
else:
86+
if self.mimetype or self.file:
87+
raise ValidationError(
88+
"Mimetype and file must not be set for observables"
89+
)
90+
content = self.name
91+
self._set_hashes(content)
92+
93+
def read(self) -> bytes:
94+
if self.classification == Classification.FILE.value:
95+
self.file.seek(0)
96+
return self.file.read()
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import logging
2+
3+
from django.db.models import QuerySet
4+
5+
logger = logging.getLogger(__name__)
6+
7+
8+
class AnalyzableQuerySet(QuerySet):
9+
10+
def visible_for_user(self, user):
11+
12+
from api_app.models import Job
13+
14+
analyzables = (
15+
Job.objects.visible_for_user(user)
16+
.values("analyzable")
17+
.distinct()
18+
.values_list("analyzable__pk", flat=True)
19+
)
20+
return self.filter(pk__in=analyzables)
21+
22+
def create(self, *args, **kwargs):
23+
obj = self.model(**kwargs)
24+
self._for_write = True
25+
try:
26+
obj.full_clean()
27+
except Exception as e:
28+
logger.error(f"Already exists obj {obj.md5}")
29+
raise e
30+
obj.save(force_insert=True, using=self.db)
31+
return obj

0 commit comments

Comments
 (0)