Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions api_app/analyzers_manager/migrations/0148_analyzer_config_nuclei.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from django.db import migrations
from django.db.models.fields.related_descriptors import (
ForwardManyToOneDescriptor,
ForwardOneToOneDescriptor,
ManyToManyDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
)

plugin = {
"python_module": {
"health_check_schedule": None,
"update_schedule": None,
"module": "nuclei.NucleiAnalyzer",
"base_path": "api_app.analyzers_manager.observable_analyzers",
},
"name": "Nuclei",
"description": "[Nuclei](https://github.com/projectdiscovery/nuclei) is a fast, customizable vulnerability scanner that leverages YAML-based templates to detect, rank, and address security flaws. It operates using structured templates that define specific security checks.",
"disabled": False,
"soft_time_limit": 1200,
"routing_key": "default",
"health_check_status": True,
"type": "observable",
"docker_based": True,
"maximum_tlp": "RED",
"observable_supported": ["ip", "url"],
"supported_filetypes": [],
"run_hash": False,
"run_hash_type": "",
"not_supported_filetypes": [],
"mapping_data_model": {},
"model": "analyzers_manager.AnalyzerConfig",
}

params = [
{
"python_module": {
"module": "nuclei.NucleiAnalyzer",
"base_path": "api_app.analyzers_manager.observable_analyzers",
},
"name": "template_dirs",
"type": "list",
"description": "The template_dirs parameter allows you to specify a list of directories containing templates, each focusing on a particular category of vulnerabilities, exposures, or security assessments.\r\nAvailable Template Categories:\r\ncloud\r\ncode\r\ncves\r\nvulnerabilities\r\ndns\r\nfile\r\nheadless\r\nhelpers\r\nhttp\r\njavascript\r\nnetwork\r\npassive\r\nprofiles\r\nssl\r\nworkflows\r\nexposures",
"is_secret": False,
"required": False,
}
]

values = [
{
"parameter": {
"python_module": {
"module": "nuclei.NucleiAnalyzer",
"base_path": "api_app.analyzers_manager.observable_analyzers",
},
"name": "template_dirs",
"type": "list",
"description": "The template_dirs parameter allows you to specify a list of directories containing templates, each focusing on a particular category of vulnerabilities, exposures, or security assessments.\r\nAvailable Template Categories:\r\ncloud\r\ncode\r\ncves\r\nvulnerabilities\r\ndns\r\nfile\r\nheadless\r\nhelpers\r\nhttp\r\njavascript\r\nnetwork\r\npassive\r\nprofiles\r\nssl\r\nworkflows\r\nexposures",
"is_secret": False,
"required": False,
},
"analyzer_config": "Nuclei",
"connector_config": None,
"visualizer_config": None,
"ingestor_config": None,
"pivot_config": None,
"for_organization": False,
"value": [],
"updated_at": "2025-01-08T08:33:45.653741Z",
"owner": None,
}
]


def _get_real_obj(Model, field, value):
def _get_obj(Model, other_model, value):
if isinstance(value, dict):
real_vals = {}
for key, real_val in value.items():
real_vals[key] = _get_real_obj(other_model, key, real_val)
value = other_model.objects.get_or_create(**real_vals)[0]
# it is just the primary key serialized
else:
if isinstance(value, int):
if Model.__name__ == "PluginConfig":
value = other_model.objects.get(name=plugin["name"])
else:
value = other_model.objects.get(pk=value)
else:
value = other_model.objects.get(name=value)
return value

if (
type(getattr(Model, field))
in [
ForwardManyToOneDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
ForwardOneToOneDescriptor,
]
and value
):
other_model = getattr(Model, field).get_queryset().model
value = _get_obj(Model, other_model, value)
elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value:
other_model = getattr(Model, field).rel.model
value = [_get_obj(Model, other_model, val) for val in value]
return value


def _create_object(Model, data):
mtm, no_mtm = {}, {}
for field, value in data.items():
value = _get_real_obj(Model, field, value)
if type(getattr(Model, field)) is ManyToManyDescriptor:
mtm[field] = value
else:
no_mtm[field] = value
try:
o = Model.objects.get(**no_mtm)
except Model.DoesNotExist:
o = Model(**no_mtm)
o.full_clean()
o.save()
for field, value in mtm.items():
attribute = getattr(o, field)
if value is not None:
attribute.set(value)
return False
return True


def migrate(apps, schema_editor):
Parameter = apps.get_model("api_app", "Parameter")
PluginConfig = apps.get_model("api_app", "PluginConfig")
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
if not Model.objects.filter(name=plugin["name"]).exists():
exists = _create_object(Model, plugin)
if not exists:
for param in params:
_create_object(Parameter, param)
for value in values:
_create_object(PluginConfig, value)


def reverse_migrate(apps, schema_editor):
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
Model.objects.get(name=plugin["name"]).delete()


class Migration(migrations.Migration):
atomic = False
dependencies = [
("api_app", "0065_job_mpnodesearch"),
(
"analyzers_manager",
"0147_alter_analyzer_config_feodo_yaraify_urlhaus_yaraify_scan",
),
]

operations = [migrations.RunPython(migrate, reverse_migrate)]
67 changes: 67 additions & 0 deletions api_app/analyzers_manager/observable_analyzers/nuclei.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.

import json

from api_app.analyzers_manager.classes import DockerBasedAnalyzer, ObservableAnalyzer


class NucleiAnalyzer(ObservableAnalyzer, DockerBasedAnalyzer):
url: str = "http://nuclei_analyzer:4008/run-nuclei"
template_dirs: list
max_tries: int = 40
poll_distance: int = 30

@classmethod
def update(cls) -> bool:
pass

def run(self):
"""
Prepares and executes a Nuclei scan through the Docker-based API.
"""
VALID_TEMPLATE_CATEGORIES = {
"cloud",
"code",
"cves",
"vulnerabilities",
"dns",
"file",
"headless",
"helpers",
"http",
"javascript",
"network",
"passive",
"profiles",
"ssl",
"workflows",
"exposures",
}

args = [self.observable_name]

# Append valid template directories with the "-t" flag
for template_dir in self.template_dirs:
if template_dir in VALID_TEMPLATE_CATEGORIES:
args.extend(["-t", template_dir])
else:
print(f"Skipping invalid template directory: {template_dir}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be a warning log and you should add it in the report like this self.report.errors.append(warning). Remember to reference the analyzed observable in the log

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated


req_data = {"args": args}

# Execute the request
response = self._docker_run(req_data=req_data, req_files=None)
print(response)

if isinstance(response, dict):
response = json.dumps(response)
return response
else:
json_objects = []
for line in response.strip().split("\n"):
try:
json_objects.append(json.loads(line))
except json.JSONDecodeError:
print(f"Skipping non-JSON line: {line}")
return json_objects
28 changes: 28 additions & 0 deletions integrations/nuclei_analyzer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Use the official Nuclei image as the base
FROM projectdiscovery/nuclei:v3.3.8

ENV LOG_PATH=/var/log/nuclei_analyzer
ENV USER=nuclei-user

# Install required packages using apk (Alpine Package Keeper)
RUN apk add --no-cache python3 py3-pip

# Create a working directory
WORKDIR /app

# Copy the requirements file and install dependencies
COPY requirements.txt .
RUN pip3 install --no-cache-dir -r requirements.txt

# Copy the Flask API code
COPY app.py .

# Expose the API port
EXPOSE 5000

COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

# RUN nuclei -update-template-dir /opt/nuclei-api/nuclei-templates -update-templates

ENTRYPOINT ["/entrypoint.sh"]
70 changes: 70 additions & 0 deletions integrations/nuclei_analyzer/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import logging
import os

from flask import Flask
from flask_executor import Executor
from flask_shell2http import Shell2HTTP

# Logger configuration
LOG_NAME = "nuclei_scanner"
logger = logging.getLogger("flask_shell2http")

# Create formatter
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

# Set log level from environment variable or default to INFO
log_level = os.getenv("LOG_LEVEL", logging.INFO)
log_path = os.getenv("LOG_PATH", f"/var/log/{LOG_NAME}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please follow the check_pcap.py as I mentioned, this log path is wrong

log_path = os.getenv("LOG_PATH", f"/var/log/intel_owl/{LOG_NAME}")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated the log path.


# Ensure log directory exists
os.makedirs(log_path, exist_ok=True)

# Create file handlers for both general logs and errors
fh = logging.FileHandler(f"{log_path}/{LOG_NAME}.log")
fh.setFormatter(formatter)
fh.setLevel(log_level)

fh_err = logging.FileHandler(f"{log_path}/{LOG_NAME}_errors.log")
fh_err.setFormatter(formatter)
fh_err.setLevel(logging.ERROR)

# Add handlers to logger
logger.addHandler(fh)
logger.addHandler(fh_err)
logger.setLevel(log_level)

# Flask application instance with secret key
app = Flask(__name__)
app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", os.urandom(24).hex())

# Initialize the Executor for background task processing
executor = Executor(app)

# Initialize the Shell2HTTP for exposing shell commands as HTTP endpoints
shell2http = Shell2HTTP(app=app, executor=executor)


def my_callback_fn(context, future):
"""
Callback function to handle Nuclei scan results
"""
try:
result = future.result()
logger.info(f"Nuclei scan completed for context: {context}")
logger.debug(f"Scan result: {result}")
except Exception as e:
logger.error(f"Error in callback function: {str(e)}", exc_info=True)
raise
Comment on lines +51 to +69
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not complete, please follow the 4 steps described in intercept_suricata_result

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the reason why you probably do not get valid output.

A question: is nuclei -j -ud /opt/nuclei-api/nuclei-templates -u json? this is what is needed here

Copy link
Contributor Author

@pranjalg1331 pranjalg1331 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is nuclei -j -ud /opt/nuclei-api/nuclei-templates -u json? this is what is needed here

Yes, -j flag is used to give a json output. However the json output is specific to each template, and so the resulting report contains multiple JSON objects in a single string, which we have to reformat in the analyzer. Should I perform this reformatting inside the API itself in the callback function?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

multiple JSON objects in a single string

means something like string = '[{"test":1},{"test":2}]', right?

In this case, you can json.loads(string) and get the list and the assign it to a key in the resulting dictionary.

And yes, it would be better to do that parsing here instead of the analyzer.

And if the format is not like I mentioned, that would mean that it is not valid JSON and so there's something wrong with the library

Copy link
Contributor Author

@pranjalg1331 pranjalg1331 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The format isstring = '{"test":1}\n{"test":2}'. However I can manually put all the json objects from the string inside a list and return them (like I am already doing in the analyzer).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok! can you please write down as a comment the reason why because this is unusual and this could be maintainers

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok! can you please write down as a comment the reason why because this is unusual and this could be maintainers
Sorry, I could not understand. Is there a typo?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah few words were missing. I was saying that, considering that that parsing is unusual, I would like you to add a comment close to the code to help future maintainers



# Register the 'nuclei' command
shell2http.register_command(
endpoint="run-nuclei",
command_name="nuclei -j -ud /opt/nuclei-api/nuclei-templates -u",
callback_fn=my_callback_fn,
)


if __name__ == "__main__":
logger.info("Starting Nuclei scanner API server")
app.run(host="0.0.0.0", port=4008)
6 changes: 6 additions & 0 deletions integrations/nuclei_analyzer/compose-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
services:
nuclei_analyzer:
build:
context: ../integrations/nuclei_analyzer
dockerfile: Dockerfile
image: pranjalg1310/nuclei-analyzer:3.0.4
15 changes: 15 additions & 0 deletions integrations/nuclei_analyzer/compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# All additional integrations should be added following this format only.

services:
nuclei_analyzer:
image: pranjalg1310/nuclei-analyzer:3.0.4
container_name: nuclei_analyzer
restart: unless-stopped
expose:
- "4008"
env_file:
- env_file_integrations
volumes:
- generic_logs:/var/log/intel_owl
depends_on:
- uwsgi
16 changes: 16 additions & 0 deletions integrations/nuclei_analyzer/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/sh

TEMPLATES_DIR="/opt/nuclei-api/nuclei-templates"

echo "Updating Nuclei templates..."
nuclei -update-template-dir $TEMPLATES_DIR -update-templates

echo "Verifying Nuclei templates..."
while [ ! -d "$TEMPLATES_DIR" ] || [ -z "$(ls -A $TEMPLATES_DIR)" ]; do
echo "Templates not found or empty, retrying update in 10 seconds..."
nuclei -update-template-dir $TEMPLATES_DIR -update-templates
sleep 10
done
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

whis is a potential infinite loop, do not use while like this. In case the templates are not found, the entrypoint should exit with a custom error

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated the code to exit if templates are not downloaded successfully.


echo "Templates downloaded successfully. Starting Flask API..."
exec gunicorn -b 0.0.0.0:4008 --timeout 120 --access-logfile - "app:app"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use all the options we used in the other entrypoint.sh you can found

Copy link
Contributor Author

@pranjalg1331 pranjalg1331 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Included gunicorn logging same as Suricata.

2 changes: 2 additions & 0 deletions integrations/nuclei_analyzer/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Flask-Shell2HTTP-fork==1.9.2
gunicorn==23.0.0
6 changes: 5 additions & 1 deletion start
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ check_parameters "$@" && shift 2
load_env "docker/.env"
current_version=${REACT_APP_INTELOWL_VERSION/"v"/""}

docker_analyzers=("pcap_analyzers" "tor_analyzers" "malware_tools_analyzers" "cyberchef" "phoneinfoga" "phishing_analyzers")
docker_analyzers=("pcap_analyzers" "tor_analyzers" "malware_tools_analyzers" "cyberchef" "phoneinfoga" "phishing_analyzers" "nuclei_analyzer")


for value in "${docker_analyzers[@]}"; do
Expand Down Expand Up @@ -147,6 +147,10 @@ while [[ $# -gt 0 ]]; do
analyzers["tor_analyzers"]=true
shift 1
;;
--nuclei_analyzer)
analyzers["nuclei_analyzer"]=true
shift 1
;;
--malware_tools_analyzers)
analyzers["malware_tools_analyzers"]=true
shift 1
Expand Down
Loading