Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ updates:
- dependency-name: "*"
update-types: [ "version-update:semver-patch" ]

- package-ecosystem: "pip"
directory: "/integrations/nuclei_analyzer"
schedule:
interval: "weekly"
day: "tuesday"
target-branch: "develop"
ignore:
# ignore all patch updates since we are using ~=
# this does not work for security updates
- dependency-name: "*"
update-types: [ "version-update:semver-patch" ]

- package-ecosystem: "pip"
directory: "/integrations/phishing_analyzers"
schedule:
Expand Down Expand Up @@ -119,6 +131,18 @@ updates:
- dependency-name: "*"
update-types: ["version-update:semver-patch"]

- package-ecosystem: "docker"
directory: "/integrations/nuclei_analyzer"
schedule:
interval: "weekly"
day: "tuesday"
target-branch: "develop"
ignore:
# ignore all patch updates since we are using ~=
# this does not work for security updates
- dependency-name: "*"
update-types: ["version-update:semver-patch"]

- package-ecosystem: "docker"
directory: "/integrations/malware_tools_analyzers"
schedule:
Expand Down
163 changes: 163 additions & 0 deletions api_app/analyzers_manager/migrations/0148_analyzer_config_nuclei.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from django.db import migrations
from django.db.models.fields.related_descriptors import (
ForwardManyToOneDescriptor,
ForwardOneToOneDescriptor,
ManyToManyDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
)

plugin = {
"python_module": {
"health_check_schedule": None,
"update_schedule": None,
"module": "nuclei.NucleiAnalyzer",
"base_path": "api_app.analyzers_manager.observable_analyzers",
},
"name": "Nuclei",
"description": "[Nuclei](https://github.com/projectdiscovery/nuclei) is a fast, customizable vulnerability scanner that leverages YAML-based templates to detect, rank, and address security flaws. It operates using structured templates that define specific security checks.",
"disabled": False,
"soft_time_limit": 1200,
"routing_key": "default",
"health_check_status": True,
"type": "observable",
"docker_based": True,
"maximum_tlp": "RED",
"observable_supported": ["ip", "url"],
"supported_filetypes": [],
"run_hash": False,
"run_hash_type": "",
"not_supported_filetypes": [],
"mapping_data_model": {},
"model": "analyzers_manager.AnalyzerConfig",
}

params = [
{
"python_module": {
"module": "nuclei.NucleiAnalyzer",
"base_path": "api_app.analyzers_manager.observable_analyzers",
},
"name": "template_dirs",
"type": "list",
"description": "The template_dirs parameter allows you to specify a list of directories containing templates, each focusing on a particular category of vulnerabilities, exposures, or security assessments.\r\nAvailable Template Categories:\r\ncloud\r\ncode\r\ncves\r\nvulnerabilities\r\ndns\r\nfile\r\nheadless\r\nhelpers\r\nhttp\r\njavascript\r\nnetwork\r\npassive\r\nprofiles\r\nssl\r\nworkflows\r\nexposures",
"is_secret": False,
"required": False,
}
]

values = [
{
"parameter": {
"python_module": {
"module": "nuclei.NucleiAnalyzer",
"base_path": "api_app.analyzers_manager.observable_analyzers",
},
"name": "template_dirs",
"type": "list",
"description": "The template_dirs parameter allows you to specify a list of directories containing templates, each focusing on a particular category of vulnerabilities, exposures, or security assessments.\r\nAvailable Template Categories:\r\ncloud\r\ncode\r\ncves\r\nvulnerabilities\r\ndns\r\nfile\r\nheadless\r\nhelpers\r\nhttp\r\njavascript\r\nnetwork\r\npassive\r\nprofiles\r\nssl\r\nworkflows\r\nexposures",
"is_secret": False,
"required": False,
},
"analyzer_config": "Nuclei",
"connector_config": None,
"visualizer_config": None,
"ingestor_config": None,
"pivot_config": None,
"for_organization": False,
"value": [],
"updated_at": "2025-01-08T08:33:45.653741Z",
"owner": None,
}
]


def _get_real_obj(Model, field, value):
def _get_obj(Model, other_model, value):
if isinstance(value, dict):
real_vals = {}
for key, real_val in value.items():
real_vals[key] = _get_real_obj(other_model, key, real_val)
value = other_model.objects.get_or_create(**real_vals)[0]
# it is just the primary key serialized
else:
if isinstance(value, int):
if Model.__name__ == "PluginConfig":
value = other_model.objects.get(name=plugin["name"])
else:
value = other_model.objects.get(pk=value)
else:
value = other_model.objects.get(name=value)
return value

if (
type(getattr(Model, field))
in [
ForwardManyToOneDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
ForwardOneToOneDescriptor,
]
and value
):
other_model = getattr(Model, field).get_queryset().model
value = _get_obj(Model, other_model, value)
elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value:
other_model = getattr(Model, field).rel.model
value = [_get_obj(Model, other_model, val) for val in value]
return value


def _create_object(Model, data):
mtm, no_mtm = {}, {}
for field, value in data.items():
value = _get_real_obj(Model, field, value)
if type(getattr(Model, field)) is ManyToManyDescriptor:
mtm[field] = value
else:
no_mtm[field] = value
try:
o = Model.objects.get(**no_mtm)
except Model.DoesNotExist:
o = Model(**no_mtm)
o.full_clean()
o.save()
for field, value in mtm.items():
attribute = getattr(o, field)
if value is not None:
attribute.set(value)
return False
return True


def migrate(apps, schema_editor):
Parameter = apps.get_model("api_app", "Parameter")
PluginConfig = apps.get_model("api_app", "PluginConfig")
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
if not Model.objects.filter(name=plugin["name"]).exists():
exists = _create_object(Model, plugin)
if not exists:
for param in params:
_create_object(Parameter, param)
for value in values:
_create_object(PluginConfig, value)


def reverse_migrate(apps, schema_editor):
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
Model.objects.get(name=plugin["name"]).delete()


class Migration(migrations.Migration):
atomic = False
dependencies = [
("api_app", "0065_job_mpnodesearch"),
(
"analyzers_manager",
"0147_alter_analyzer_config_feodo_yaraify_urlhaus_yaraify_scan",
),
]

operations = [migrations.RunPython(migrate, reverse_migrate)]
56 changes: 56 additions & 0 deletions api_app/analyzers_manager/observable_analyzers/nuclei.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.

from api_app.analyzers_manager.classes import DockerBasedAnalyzer, ObservableAnalyzer


class NucleiAnalyzer(ObservableAnalyzer, DockerBasedAnalyzer):
url: str = "http://nuclei_analyzer:4008/run-nuclei"
template_dirs: list
max_tries: int = 40
poll_distance: int = 30

@classmethod
def update(cls) -> bool:
pass

def run(self):
"""
Prepares and executes a Nuclei scan through the Docker-based API.
"""
VALID_TEMPLATE_CATEGORIES = {
"cloud",
"code",
"cves",
"vulnerabilities",
"dns",
"file",
"headless",
"helpers",
"http",
"javascript",
"network",
"passive",
"profiles",
"ssl",
"workflows",
"exposures",
}

args = [self.observable_name]

# Append valid template directories with the "-t" flag
for template_dir in self.template_dirs:
if template_dir in VALID_TEMPLATE_CATEGORIES:
args.extend(["-t", template_dir])
else:
print(f"Skipping invalid template directory: {template_dir}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be a warning log and you should add it in the report like this self.report.errors.append(warning). Remember to reference the analyzed observable in the log

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated


req_data = {"args": args}

# Execute the request
response = self._docker_run(req_data=req_data, req_files=None)

analysis = response.get("data", [])

return analysis
41 changes: 41 additions & 0 deletions integrations/nuclei_analyzer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM projectdiscovery/nuclei:v3.3.8

ENV LOG_PATH=/var/log/intel_owl/nuclei_analyzer
ENV USER=nuclei-user
ENV PROJECT_PATH=/app

# Create non-root user
RUN adduser -D -h /home/${USER} ${USER}

# Install required packages using apk and clean cache in the same layer
RUN apk add --no-cache python3 py3-pip \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please select a specific python version here. The more recent the better.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

&& rm -rf /var/cache/apk/* \
&& pip3 install --no-cache-dir --upgrade pip

# Create working directory and set ownership
WORKDIR /app

# Copy and install requirements first (better layer caching)
COPY requirements.txt .
RUN pip3 install --no-cache-dir -r requirements.txt \
&& rm -rf ~/.cache/pip/*

# Create log directory with proper permissions
RUN mkdir -p ${LOG_PATH} \
&& touch ${LOG_PATH}/gunicorn_access.log ${LOG_PATH}/gunicorn_errors.log \
&& chown -R ${USER}:${USER} ${LOG_PATH} \
&& chmod 755 ${LOG_PATH} \
&& chmod 666 ${LOG_PATH}/gunicorn_access.log \
&& chmod 666 ${LOG_PATH}/gunicorn_errors.log
# Copy application files
COPY app.py .
COPY entrypoint.sh /entrypoint.sh

# Set proper permissions
RUN chown -R ${USER}:${USER} /app \
&& chmod +x /entrypoint.sh

# Expose the API port
EXPOSE 4008

ENTRYPOINT ["/entrypoint.sh"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please add an HEALTHCHECK here as an additional check?

You can check the Dockerfile_nginx as an example. You can check whether the gunicorn app is up or not after a predefined period that should be the expected time of execution of the entrypoint.sh script

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the health check, I have added an extra endpoint to the API which is being called from the dockerfile.

77 changes: 77 additions & 0 deletions integrations/nuclei_analyzer/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import json
import logging
import os

from flask import Flask
from flask_executor import Executor
from flask_shell2http import Shell2HTTP

# Logger configuration
LOG_NAME = "nuclei_scanner"
logger = logging.getLogger("flask_shell2http")

# Create formatter
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

# Set log level from environment variable or default to INFO
log_level = os.getenv("LOG_LEVEL", logging.INFO)
log_path = os.getenv("LOG_PATH", f"/var/log/intel_owl/{LOG_NAME}")

# Create file handlers for both general logs and errors
fh = logging.FileHandler(f"{log_path}/{LOG_NAME}.log")
fh.setFormatter(formatter)
fh.setLevel(log_level)

fh_err = logging.FileHandler(f"{log_path}/{LOG_NAME}_errors.log")
fh_err.setFormatter(formatter)
fh_err.setLevel(logging.ERROR)

# Add handlers to logger
logger.addHandler(fh)
logger.addHandler(fh_err)
logger.setLevel(log_level)

# Flask application instance with secret key
app = Flask(__name__)
app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", os.urandom(24).hex())

# Initialize the Executor for background task processing
executor = Executor(app)

# Initialize the Shell2HTTP for exposing shell commands as HTTP endpoints
shell2http = Shell2HTTP(app=app, executor=executor)


def my_callback_fn(context, future):
"""
Callback function to handle Nuclei scan results
"""
try:
result = future.result()
report = result["report"]
# The report is a string with multiple JSON objects separated by newlines
json_objects = []
for line in report.strip().split("\n"):
try:
json_objects.append(json.loads(line))
except json.JSONDecodeError:
print(f"Skipping non-JSON line: {line}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

log.warning

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

result["report"] = {"data": json_objects}
logger.info(f"Nuclei scan completed for context: {context}")
logger.debug(f"Scan result: {result}")
except Exception as e:
logger.error(f"Error in callback function: {str(e)}", exc_info=True)
raise
Comment on lines +51 to +69
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not complete, please follow the 4 steps described in intercept_suricata_result

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the reason why you probably do not get valid output.

A question: is nuclei -j -ud /opt/nuclei-api/nuclei-templates -u json? this is what is needed here

Copy link
Contributor Author

@pranjalg1331 pranjalg1331 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is nuclei -j -ud /opt/nuclei-api/nuclei-templates -u json? this is what is needed here

Yes, -j flag is used to give a json output. However the json output is specific to each template, and so the resulting report contains multiple JSON objects in a single string, which we have to reformat in the analyzer. Should I perform this reformatting inside the API itself in the callback function?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

multiple JSON objects in a single string

means something like string = '[{"test":1},{"test":2}]', right?

In this case, you can json.loads(string) and get the list and the assign it to a key in the resulting dictionary.

And yes, it would be better to do that parsing here instead of the analyzer.

And if the format is not like I mentioned, that would mean that it is not valid JSON and so there's something wrong with the library

Copy link
Contributor Author

@pranjalg1331 pranjalg1331 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The format isstring = '{"test":1}\n{"test":2}'. However I can manually put all the json objects from the string inside a list and return them (like I am already doing in the analyzer).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok! can you please write down as a comment the reason why because this is unusual and this could be maintainers

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok! can you please write down as a comment the reason why because this is unusual and this could be maintainers
Sorry, I could not understand. Is there a typo?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah few words were missing. I was saying that, considering that that parsing is unusual, I would like you to add a comment close to the code to help future maintainers



# Register the 'nuclei' command
shell2http.register_command(
endpoint="run-nuclei",
command_name="nuclei -j -ud /opt/nuclei-api/nuclei-templates -u",
callback_fn=my_callback_fn,
)


if __name__ == "__main__":
logger.info("Starting Nuclei scanner API server")
app.run(host="0.0.0.0", port=4008)
6 changes: 6 additions & 0 deletions integrations/nuclei_analyzer/compose-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
services:
nuclei_analyzer:
build:
context: ../integrations/nuclei_analyzer
dockerfile: Dockerfile
image: pranjalg1310/nuclei-analyzer:test
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

intelowlproject repo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

Loading
Loading