intelowlproject · mlodic · Feb 17, 2025 · Nov 6, 2024 · Jan 5, 2025 · Jan 6, 2025
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -50,6 +50,18 @@ updates:
       - dependency-name: "*"
         update-types: [ "version-update:semver-patch" ]
 
+  - package-ecosystem: "pip"
+    directory: "/integrations/nuclei_analyzer"
+    schedule:
+      interval: "weekly"
+      day: "tuesday"
+    target-branch: "develop"
+    ignore:
+      # ignore all patch updates since we are using ~=
+      # this does not work for security updates
+      - dependency-name: "*"
+        update-types: [ "version-update:semver-patch" ]
+
   - package-ecosystem: "pip"
     directory: "/integrations/phishing_analyzers"
     schedule:
@@ -119,6 +131,18 @@ updates:
       - dependency-name: "*"
         update-types: ["version-update:semver-patch"]
 
+  - package-ecosystem: "docker"
+    directory: "/integrations/nuclei_analyzer"
+    schedule:
+      interval: "weekly"
+      day: "tuesday"
+    target-branch: "develop"
+    ignore:
+      # ignore all patch updates since we are using ~=
+      # this does not work for security updates
+      - dependency-name: "*"
+        update-types: ["version-update:semver-patch"]
+
   - package-ecosystem: "docker"
     directory: "/integrations/malware_tools_analyzers"
     schedule:

diff --git a/api_app/analyzers_manager/migrations/0148_analyzer_config_nuclei.py b/api_app/analyzers_manager/migrations/0148_analyzer_config_nuclei.py
@@ -0,0 +1,163 @@
+from django.db import migrations
+from django.db.models.fields.related_descriptors import (
+    ForwardManyToOneDescriptor,
+    ForwardOneToOneDescriptor,
+    ManyToManyDescriptor,
+    ReverseManyToOneDescriptor,
+    ReverseOneToOneDescriptor,
+)
+
+plugin = {
+    "python_module": {
+        "health_check_schedule": None,
+        "update_schedule": None,
+        "module": "nuclei.NucleiAnalyzer",
+        "base_path": "api_app.analyzers_manager.observable_analyzers",
+    },
+    "name": "Nuclei",
+    "description": "[Nuclei](https://github.com/projectdiscovery/nuclei) is a fast, customizable vulnerability scanner that leverages YAML-based templates to detect, rank, and address security flaws. It operates using structured templates that define specific security checks.",
+    "disabled": False,
+    "soft_time_limit": 1200,
+    "routing_key": "default",
+    "health_check_status": True,
+    "type": "observable",
+    "docker_based": True,
+    "maximum_tlp": "RED",
+    "observable_supported": ["ip", "url"],
+    "supported_filetypes": [],
+    "run_hash": False,
+    "run_hash_type": "",
+    "not_supported_filetypes": [],
+    "mapping_data_model": {},
+    "model": "analyzers_manager.AnalyzerConfig",
+}
+
+params = [
+    {
+        "python_module": {
+            "module": "nuclei.NucleiAnalyzer",
+            "base_path": "api_app.analyzers_manager.observable_analyzers",
+        },
+        "name": "template_dirs",
+        "type": "list",
+        "description": "The template_dirs parameter allows you to specify a list of directories containing templates, each focusing on a particular category of vulnerabilities, exposures, or security assessments.\r\nAvailable Template Categories:\r\ncloud\r\ncode\r\ncves\r\nvulnerabilities\r\ndns\r\nfile\r\nheadless\r\nhelpers\r\nhttp\r\njavascript\r\nnetwork\r\npassive\r\nprofiles\r\nssl\r\nworkflows\r\nexposures",
+        "is_secret": False,
+        "required": False,
+    }
+]
+
+values = [
+    {
+        "parameter": {
+            "python_module": {
+                "module": "nuclei.NucleiAnalyzer",
+                "base_path": "api_app.analyzers_manager.observable_analyzers",
+            },
+            "name": "template_dirs",
+            "type": "list",
+            "description": "The template_dirs parameter allows you to specify a list of directories containing templates, each focusing on a particular category of vulnerabilities, exposures, or security assessments.\r\nAvailable Template Categories:\r\ncloud\r\ncode\r\ncves\r\nvulnerabilities\r\ndns\r\nfile\r\nheadless\r\nhelpers\r\nhttp\r\njavascript\r\nnetwork\r\npassive\r\nprofiles\r\nssl\r\nworkflows\r\nexposures",
+            "is_secret": False,
+            "required": False,
+        },
+        "analyzer_config": "Nuclei",
+        "connector_config": None,
+        "visualizer_config": None,
+        "ingestor_config": None,
+        "pivot_config": None,
+        "for_organization": False,
+        "value": [],
+        "updated_at": "2025-01-08T08:33:45.653741Z",
+        "owner": None,
+    }
+]
+
+
+def _get_real_obj(Model, field, value):
+    def _get_obj(Model, other_model, value):
+        if isinstance(value, dict):
+            real_vals = {}
+            for key, real_val in value.items():
+                real_vals[key] = _get_real_obj(other_model, key, real_val)
+            value = other_model.objects.get_or_create(**real_vals)[0]
+        # it is just the primary key serialized
+        else:
+            if isinstance(value, int):
+                if Model.__name__ == "PluginConfig":
+                    value = other_model.objects.get(name=plugin["name"])
+                else:
+                    value = other_model.objects.get(pk=value)
+            else:
+                value = other_model.objects.get(name=value)
+        return value
+
+    if (
+        type(getattr(Model, field))
+        in [
+            ForwardManyToOneDescriptor,
+            ReverseManyToOneDescriptor,
+            ReverseOneToOneDescriptor,
+            ForwardOneToOneDescriptor,
+        ]
+        and value
+    ):
+        other_model = getattr(Model, field).get_queryset().model
+        value = _get_obj(Model, other_model, value)
+    elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value:
+        other_model = getattr(Model, field).rel.model
+        value = [_get_obj(Model, other_model, val) for val in value]
+    return value
+
+
+def _create_object(Model, data):
+    mtm, no_mtm = {}, {}
+    for field, value in data.items():
+        value = _get_real_obj(Model, field, value)
+        if type(getattr(Model, field)) is ManyToManyDescriptor:
+            mtm[field] = value
+        else:
+            no_mtm[field] = value
+    try:
+        o = Model.objects.get(**no_mtm)
+    except Model.DoesNotExist:
+        o = Model(**no_mtm)
+        o.full_clean()
+        o.save()
+        for field, value in mtm.items():
+            attribute = getattr(o, field)
+            if value is not None:
+                attribute.set(value)
+        return False
+    return True
+
+
+def migrate(apps, schema_editor):
+    Parameter = apps.get_model("api_app", "Parameter")
+    PluginConfig = apps.get_model("api_app", "PluginConfig")
+    python_path = plugin.pop("model")
+    Model = apps.get_model(*python_path.split("."))
+    if not Model.objects.filter(name=plugin["name"]).exists():
+        exists = _create_object(Model, plugin)
+        if not exists:
+            for param in params:
+                _create_object(Parameter, param)
+            for value in values:
+                _create_object(PluginConfig, value)
+
+
+def reverse_migrate(apps, schema_editor):
+    python_path = plugin.pop("model")
+    Model = apps.get_model(*python_path.split("."))
+    Model.objects.get(name=plugin["name"]).delete()
+
+
+class Migration(migrations.Migration):
+    atomic = False
+    dependencies = [
+        ("api_app", "0065_job_mpnodesearch"),
+        (
+            "analyzers_manager",
+            "0147_alter_analyzer_config_feodo_yaraify_urlhaus_yaraify_scan",
+        ),
+    ]
+
+    operations = [migrations.RunPython(migrate, reverse_migrate)]
diff --git a/api_app/analyzers_manager/observable_analyzers/nuclei.py b/api_app/analyzers_manager/observable_analyzers/nuclei.py
@@ -0,0 +1,56 @@
+# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
+# See the file 'LICENSE' for copying permission.
+
+from api_app.analyzers_manager.classes import DockerBasedAnalyzer, ObservableAnalyzer
+
+
+class NucleiAnalyzer(ObservableAnalyzer, DockerBasedAnalyzer):
+    url: str = "http://nuclei_analyzer:4008/run-nuclei"
+    template_dirs: list
+    max_tries: int = 40
+    poll_distance: int = 30
+
+    @classmethod
+    def update(cls) -> bool:
+        pass
+
+    def run(self):
+        """
+        Prepares and executes a Nuclei scan through the Docker-based API.
+        """
+        VALID_TEMPLATE_CATEGORIES = {
+            "cloud",
+            "code",
+            "cves",
+            "vulnerabilities",
+            "dns",
+            "file",
+            "headless",
+            "helpers",
+            "http",
+            "javascript",
+            "network",
+            "passive",
+            "profiles",
+            "ssl",
+            "workflows",
+            "exposures",
+        }
+
+        args = [self.observable_name]
+
+        # Append valid template directories with the "-t" flag
+        for template_dir in self.template_dirs:
+            if template_dir in VALID_TEMPLATE_CATEGORIES:
+                args.extend(["-t", template_dir])
+            else:
+                print(f"Skipping invalid template directory: {template_dir}")
+
+        req_data = {"args": args}
+
+        # Execute the request
+        response = self._docker_run(req_data=req_data, req_files=None)
+
+        analysis = response.get("data", [])
+
+        return analysis
diff --git a/integrations/nuclei_analyzer/Dockerfile b/integrations/nuclei_analyzer/Dockerfile
@@ -0,0 +1,41 @@
+FROM projectdiscovery/nuclei:v3.3.8
+
+ENV LOG_PATH=/var/log/intel_owl/nuclei_analyzer
+ENV USER=nuclei-user
+ENV PROJECT_PATH=/app
+
+# Create non-root user
+RUN adduser -D -h /home/${USER} ${USER}
+
+# Install required packages using apk and clean cache in the same layer
+RUN apk add --no-cache python3 py3-pip \
+    && rm -rf /var/cache/apk/* \
+    && pip3 install --no-cache-dir --upgrade pip
+
+# Create working directory and set ownership
+WORKDIR /app
+
+# Copy and install requirements first (better layer caching)
+COPY requirements.txt .
+RUN pip3 install --no-cache-dir -r requirements.txt \
+    && rm -rf ~/.cache/pip/*
+
+# Create log directory with proper permissions
+RUN mkdir -p ${LOG_PATH} \
+    && touch ${LOG_PATH}/gunicorn_access.log ${LOG_PATH}/gunicorn_errors.log \
+    && chown -R ${USER}:${USER} ${LOG_PATH} \
+    && chmod 755 ${LOG_PATH} \
+    && chmod 666 ${LOG_PATH}/gunicorn_access.log \
+    && chmod 666 ${LOG_PATH}/gunicorn_errors.log
+# Copy application files
+COPY app.py .
+COPY entrypoint.sh /entrypoint.sh
+
+# Set proper permissions
+RUN chown -R ${USER}:${USER} /app \
+    && chmod +x /entrypoint.sh
+
+# Expose the API port
+EXPOSE 4008
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/integrations/nuclei_analyzer/app.py b/integrations/nuclei_analyzer/app.py
@@ -0,0 +1,77 @@
+import json
+import logging
+import os
+
+from flask import Flask
+from flask_executor import Executor
+from flask_shell2http import Shell2HTTP
+
+# Logger configuration
+LOG_NAME = "nuclei_scanner"
+logger = logging.getLogger("flask_shell2http")
+
+# Create formatter
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+
+# Set log level from environment variable or default to INFO
+log_level = os.getenv("LOG_LEVEL", logging.INFO)
+log_path = os.getenv("LOG_PATH", f"/var/log/intel_owl/{LOG_NAME}")
+
+# Create file handlers for both general logs and errors
+fh = logging.FileHandler(f"{log_path}/{LOG_NAME}.log")
+fh.setFormatter(formatter)
+fh.setLevel(log_level)
+
+fh_err = logging.FileHandler(f"{log_path}/{LOG_NAME}_errors.log")
+fh_err.setFormatter(formatter)
+fh_err.setLevel(logging.ERROR)
+
+# Add handlers to logger
+logger.addHandler(fh)
+logger.addHandler(fh_err)
+logger.setLevel(log_level)
+
+# Flask application instance with secret key
+app = Flask(__name__)
+app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", os.urandom(24).hex())
+
+# Initialize the Executor for background task processing
+executor = Executor(app)
+
+# Initialize the Shell2HTTP for exposing shell commands as HTTP endpoints
+shell2http = Shell2HTTP(app=app, executor=executor)
+
+
+def my_callback_fn(context, future):
+    """
+    Callback function to handle Nuclei scan results
+    """
+    try:
+        result = future.result()
+        report = result["report"]
+        # The report is a string with multiple JSON objects separated by newlines
+        json_objects = []
+        for line in report.strip().split("\n"):
+            try:
+                json_objects.append(json.loads(line))
+            except json.JSONDecodeError:
+                print(f"Skipping non-JSON line: {line}")
+        result["report"] = {"data": json_objects}
+        logger.info(f"Nuclei scan completed for context: {context}")
+        logger.debug(f"Scan result: {result}")
+    except Exception as e:
+        logger.error(f"Error in callback function: {str(e)}", exc_info=True)
+        raise
+
+
+# Register the 'nuclei' command
+shell2http.register_command(
+    endpoint="run-nuclei",
+    command_name="nuclei -j -ud /opt/nuclei-api/nuclei-templates -u",
+    callback_fn=my_callback_fn,
+)
+
+
+if __name__ == "__main__":
+    logger.info("Starting Nuclei scanner API server")
+    app.run(host="0.0.0.0", port=4008)
diff --git a/integrations/nuclei_analyzer/compose-tests.yml b/integrations/nuclei_analyzer/compose-tests.yml
@@ -0,0 +1,6 @@
+services:
+  nuclei_analyzer:
+    build:
+      context: ../integrations/nuclei_analyzer
+      dockerfile: Dockerfile
+    image: pranjalg1310/nuclei-analyzer:test