diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index bdcbacbb39..9c29653d90 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -10,6 +10,11 @@ assignees: '' **Describe the bug** A clear and concise description of what the bug is. + +**Used Slips command** + +The used Slips command that trigged this bug. + **To Reproduce** Steps to reproduce the behavior: 1. Go to branch '...' diff --git a/.gitmodules b/.gitmodules index 973d05aba8..4516149822 100644 --- a/.gitmodules +++ b/.gitmodules @@ -14,3 +14,7 @@ path = feel_project url = https://github.com/stratosphereips/feel_project branch = main + +[submodule "SlipsWeb"] + path = SlipsWeb + url = https://github.com/stratosphereips/SlipsWeb.git diff --git a/CHANGELOG.md b/CHANGELOG.md index cb65a029a6..c6cfc31004 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +1.1.16 (Dec 1st, 2025) +- Fix problem reporting evidence when Slips is monitoring one interface +- Change the usage of -g option, now Slips requires the interface name to monitor when using -g. +- Fix P2P unable to connect to the redis database when using -m. +- Fix false positive setting evidence on connection to IP outside local network when the IP is multicast. +- Evidence handler and whitelist speedup by using bloom filters. +- Drop support for the dynamic reloading of whitelists +- Add an alerts visualiser web interface for TAXII servers. +- Handle Slips and iptables failovers when running Slips as an access point in the Raspberry Pi. + + 1.1.15 (Oct 31st, 2025) - Fix FP connection to port 0 for IGMP flows. - Support monitoring two interfaces when Slips is running as an access point. diff --git a/README.md b/README.md index 6ebc606f87..1efb234e26 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

-Slips v1.1.15 +Slips v1.1.16

diff --git a/SlipsWeb b/SlipsWeb new file mode 160000 index 0000000000..131238cfbd --- /dev/null +++ b/SlipsWeb @@ -0,0 +1 @@ +Subproject commit 131238cfbdc5c9db2b833e1a83c52ee8b3f23462 diff --git a/VERSION b/VERSION index 645377eea8..63b283b23a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1.15 +1.1.16 diff --git a/config/slips.yaml b/config/slips.yaml index 02adc7f1b4..e714e46c61 100644 --- a/config/slips.yaml +++ b/config/slips.yaml @@ -347,7 +347,7 @@ exporting_alerts: # Configuer all the methods Slips will export data with # Available options are slack or stix - # export_to : [stix] + # export_to : [stix] (And a TAXII server) # export_to : [slack] export_to: [] @@ -373,12 +373,11 @@ exporting_alerts: # For Stix, if Slips should use TLS use_https: false - # TAXII - discovery_path: /services/discovery-a - inbox_path: /services/inbox-a + # TAXII 2 discovery endpoint (relative path or full URL) + discovery_path: /taxii2/ - # Collection on the server you want to push stix data to - collection_name: collection-a + # Collection (ID or title) on the server you want to push STIX data to + collection_name: Alerts # This value is only used when Slips is running non-stop (e.g with -i ) # push_delay is the time to wait before pushing STIX data to server @@ -390,13 +389,7 @@ exporting_alerts: # TAXII server credentials taxii_username: admin - taxii_password: admin - - # URL used to obtain JWT token. set this to '' if you don't want to use it - # is required for JWT based authentication. - # (JWT based authentication is Optional) - # It's usually /management/auth - jwt_auth_path: /management/auth + taxii_password: changeme_before_installing_a_medallion_server ############################# CESNET: diff --git a/config/whitelist.conf b/config/whitelist.conf index f277693fe6..be554887d5 100644 --- a/config/whitelist.conf +++ b/config/whitelist.conf @@ -162,3 +162,4 @@ organization,google,both,alerts organization,apple,both,alerts organization,twitter,both,alerts domain,markmonitor.com,both,alerts +domain,whois.nic.co,both,alerts diff --git a/docker/Dockerfile b/docker/Dockerfile index afb4984050..080102d600 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -29,42 +29,27 @@ ENV NVM_DIR=/root/.nvm # use bash instead of sh SHELL ["/bin/bash", "-c"] +# Switch to Slips installation dir on login. +WORKDIR ${SLIPS_DIR} + +COPY . $SLIPS_DIR -RUN apt update && apt install -y --no-install-recommends \ - wget \ - ca-certificates \ - git \ - curl \ - gnupg \ +RUN apt-get update -o Acquire::Retries=5 -o Acquire::https::No-Cache=True \ + && apt-get install -y --no-install-recommends --fix-broken --fix-missing \ + $(cat install/apt_dependencies.txt) \ lsb-release \ software-properties-common \ - build-essential \ - file \ - lsof \ - iptables \ - iproute2 \ - nfdump \ - tshark \ - whois \ - yara \ - net-tools \ vim \ less \ unzip \ - golang \ - python3-certifi \ - python3-dev \ - python3-tzlocal \ - python3-pip \ nano \ tree \ tmux \ - arp-scan \ && echo 'deb http://download.opensuse.org/repositories/security:/zeek/xUbuntu_22.04/ /' | tee /etc/apt/sources.list.d/security:zeek.list \ && curl -fsSL https://download.opensuse.org/repositories/security:zeek/xUbuntu_22.04/Release.key | gpg --dearmor | tee /etc/apt/trusted.gpg.d/security_zeek.gpg > /dev/null \ && apt update \ - && apt install -y --no-install-recommends --fix-missing \ - zeek \ + && apt-get install -y --no-install-recommends --fix-missing \ + zeek-8.0 \ npm \ && ln -s /opt/zeek/bin/zeek /usr/local/bin/bro \ && apt clean \ @@ -72,11 +57,15 @@ RUN apt update && apt install -y --no-install-recommends \ && curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash - \ && export NVM_DIR="$HOME/.nvm" \ && [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" \ - && nvm install 22 + && nvm install 22 \ + && apt purge -y redis-server redis # we'll be compiling it manually + + # why are we compiling redis instead od just using apt? # to support running slips on the rpi (arm64). the rpi uses jemmalloc by default, which expects a different page size # than the default on x86_64 +WORKDIR / RUN pip3 install --no-cache-dir --upgrade pip \ && curl -O https://download.redis.io/redis-stable.tar.gz \ && tar xzf redis-stable.tar.gz \ @@ -87,11 +76,9 @@ RUN pip3 install --no-cache-dir --upgrade pip \ ENV PATH="$PATH:/redis-stable/src" -# Switch to Slips installation dir on login. -WORKDIR ${SLIPS_DIR} -COPY . $SLIPS_DIR +WORKDIR ${SLIPS_DIR} # Retrieve Iris COPY --from=build /iris/iris ./modules/irisModule @@ -110,4 +97,5 @@ ENV PATH="$PATH:/StratosphereLinuxIPS/p2p4slips/" WORKDIR ${SLIPS_DIR} + CMD /bin/bash diff --git a/docker/light/Dockerfile b/docker/light/Dockerfile index 355f35a0de..a8b8085b4a 100644 --- a/docker/light/Dockerfile +++ b/docker/light/Dockerfile @@ -27,7 +27,7 @@ RUN set -eux; \ | tee /etc/apt/sources.list.d/security:zeek.list \ && curl -fsSL https://download.opensuse.org/repositories/security:zeek/xUbuntu_22.04/Release.key \ | gpg --dearmor | tee /etc/apt/trusted.gpg.d/security_zeek.gpg > /dev/null \ - && apt-get update && apt-get install -y --no-install-recommends --fix-missing zeek \ + && apt-get update && apt-get install -y --no-install-recommends --fix-missing zeek-8.0 \ && ln -s /opt/zeek/bin/zeek /usr/local/bin/bro \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/light/excluded_libs.txt b/docker/light/excluded_libs.txt index a132b1d097..941d020781 100644 --- a/docker/light/excluded_libs.txt +++ b/docker/light/excluded_libs.txt @@ -16,7 +16,6 @@ scikit_learn slackclient matplotlib stix2 -cabby pandas setuptools numpy diff --git a/docs/exporting.md b/docs/exporting.md index 18079ef070..e93d515c94 100644 --- a/docs/exporting.md +++ b/docs/exporting.md @@ -40,46 +40,44 @@ You can do this by going to the channel, then clicking on the channel's name. Th ## STIX -If you want to export alerts to your TAXII server using STIX format, change ```export_to``` variable to export to STIX, and Slips will automatically generate a -```STIX_data.json``` containing all alerts it detects. +If you want to export alerts to your TAXII 2 server using STIX 2.1 format, +set ```export_to``` to ```stix``` and Slips will automatically generate a +```STIX_data.json``` bundle containing the indicators it detects and push it to +your collection. [ExportingAlerts] export_to = [stix] -You can add your TAXII server details in the following variables: +Configure the TAXII client by editing the following variables: -```TAXII_server```: link to your TAXII server +```TAXII_server```: host name or IP address of the TAXII server. -```port```: port to be used +```port```: TCP port (optional, defaults to 80/443). -```use_https```: use https or not. +```use_https```: set to true to connect over HTTPS (be careful that the default TAXII server in SlipsWeb, Medallion, do not support HTTPS yet) -```discovery_path``` and ```inbox_path``` should contain URIs not full urls. For example: +```discovery_path```: TAXII discovery endpoint path or full URL + (for example ```/taxii2/```). -```python -discovery_path = /services/discovery-a -inbox_path = /services/inbox-a -``` - -```collection_name```: the collection on the server you want to push your STIX data to. - -```push_delay```: the time to wait before pushing STIX data to server (in seconds). -It is used when slips is running non-stop (e.g with -i ) +```collection_name```: ID or title of the TAXII collection that should receive your indicators. Be default `Alerts`. -```taxii_username```: TAXII server user credentials +```push_delay```: time between automatic pushes (in seconds) when Slips is +running continuously. -```taxii_password```: TAXII server user password +```taxii_username``` / ```taxii_password```: credentials used for HTTP Basic authentication. -```jwt_auth_path```: auth path if JWT based authentication is used. It's usually /management/auth. this is what we -use to get a token. +**Change the default config password of the TAXII servers you are going to export to in ```config/medallion_config.yaml```** -if your TAXII server is a remote server, you can set the ```port``` to 443 or 80. +Slips stores the generated bundle for each run in the output directory of that +execution (for example `output//STIX_data.json`), so you can inspect the +exact STIX objects that were pushed. -If running on a file, Slips will export to server after analysis is done. -If running on an interface, Slips will export to server every push_delay seconds. by default it's 1h. +If running on a file, Slips will export once before shutdown. +If running on an interface, Slips will export to the server every +```push_delay``` seconds (default 1 hour). ## JSON format diff --git a/docs/images/immune/a8/failovers_script_output.jpg b/docs/images/immune/a8/failovers_script_output.jpg new file mode 100644 index 0000000000..7e0addb650 Binary files /dev/null and b/docs/images/immune/a8/failovers_script_output.jpg differ diff --git a/docs/images/slips.gif b/docs/images/slips.gif index e5897a3eba..41aa2b9996 100644 Binary files a/docs/images/slips.gif and b/docs/images/slips.gif differ diff --git a/docs/immune/Immune.md b/docs/immune/Immune.md index f27a942570..b54692a6c9 100644 --- a/docs/immune/Immune.md +++ b/docs/immune/Immune.md @@ -10,7 +10,9 @@ This is the main guide to the documentation related to the changes done to Slips - [LLM Research and Selection](https://stratospherelinuxips.readthedocs.io/en/develop/immune/research_and_selection_of_llm_candidates.html) - [LLM RPI Performance](https://stratospherelinuxips.readthedocs.io/en/develop/immune/research_rpi_llm_performance.html) - [LLM RPI Finetuning Frameworks](https://stratospherelinuxips.readthedocs.io/en/develop/immune/finetuning_frameworks_rpi_5.html) +- [LLM Summarization Dataset](https://stratospherelinuxips.readthedocs.io/en/develop/immune/summary_dataset.html) - [ARP Poisoning](https://stratospherelinuxips.readthedocs.io/en/develop/immune/arp_poisoning.html) - [ARP Poisoning Risks](https://stratospherelinuxips.readthedocs.io/en/develop/immune/arp_poisoning_risks.html) - [Blocking with Slips as an Access Point](https://stratospherelinuxips.readthedocs.io/en/develop/immune/blocking_in_slips.html) - [IDS-in-the-middle Traffic routing](https://stratospherelinuxips.readthedocs.io/en/develop/immune/ids_in_the_middle_traffic_routing.html) +- [RPI Failover Mechanisms](https://stratospherelinuxips.readthedocs.io/en/develop/immune/failover_mechanisms.html) diff --git a/docs/immune/failover_mechanisms.md b/docs/immune/failover_mechanisms.md new file mode 100644 index 0000000000..f7d8ced637 --- /dev/null +++ b/docs/immune/failover_mechanisms.md @@ -0,0 +1,101 @@ +# Failover Mechanisms + +The project has a few failure points listed below that we explicitly want to control instead and try to recover from. + +Our goal if something breaks, is to try to recover automatically where possible, but if recovery is not possible or the failure is critical, the user must lose internet so they are forced to debug and restart Slips manually instead of staying connected without Slips protection. + + +All failure points are handled by the ```failover_handler.sh``` script located in ```StratosphereLinuxIPS/rpi_scripts/```. + +## Prerequisites + +- Raspberry Pi with docker installed +- StratosphereLinuxIPS cloned. (or just the rpi_scripts/ directory) +- Root access to the Raspberry Pi. +- A [running access point](https://stratospherelinuxips.readthedocs.io/en/develop/immune/installing_slips_in_the_rpi.html#protect-your-local-network-with-slips-on-the-rpi). + + +## How to use + +Run the following command from Slips main directory as root: + +```bash +sudo ./rpi_scripts/failover_handler.sh , +``` + +**Where** + +- `````` is the name of the wifi interface used by the access point (e.g. ```wlan0```, etc). + +- `````` is the name of the ethernet interface connected to the router (e.g. ```eth0```, etc). + +**You should see output similar to the image below:** + +![](../images/immune/a8/failovers_script_output.jpg) + + +**Output:** + +The script will +- Log Slips docker container status, used command, and any errors to ```slips_container.log``` for debugging purposes. This file should be checked in case you notice any issues with the AP or Slips. +- Start Slips and iptables watcher services through systemd so they start automatically on reboot and on failure. +- Start slips inside a docker container monitring your ethernet and wifi interfaces. +- Mount your local ```StratosphereLinuxIPS/output``` to ```/StratosphereLinuxIPS/output``` inside the started Slips container so any output generated by Slips will be available on the host machine. + + + +## How Failovers are Handled + +Slips in the Raspberry Pi has 3 main failure points that we want to handle: +1. The access point dies +2. Slips dies +3. The Raspberry Pi reboots + +### The access point dies + +If the AP dies, clients get disconnected. There's nothing to handle here. Slips keeps monitoring the ethernet interface and when the AP is back up, clients can reconnect and Slips continues protecting them. + + +### Slips dies + +Failovers when Slips dies consist of: + +* **Iptables firewall rules persistence**: The iptables firewall rules are saved periodically by the systemd unit that watches for iptables changes and saves them using ```netfilter-persistent``` whenever a change is detected. +* **Shutting down the AP process**: If Slips crashes we do not want the AP to keep running without Slips protection so we intentionally shut down the access point for the user to notice, debug and restart Slips manually instead of staying connected without Slips protection. +* **Restarting Slips automatically through systemd:** Slips restarts automatically through systemd on failure and on reboot. +* **Logging Slips container status to a file for debugging**: Slips container logs are places in ```slips_container.log``` for debugging purposes. + + + +### The Raspberry Pi reboots + +Failovers consist of: + +* **Iptables firewall rules persistence** +* **Automatic start of Slips service through systemd** + + +When the Pi reboots, we want Slips to start automatically, and we want the iptables rules added by Slips to persist. The automatic restart is handled by systemd through the generated ```slips.service``` file, and firewall persistence is handled using the custom iptables watcher through the generated ```iptables-watcher.service```. + + +Both units are generated and started and added to the user's ```/etc/systemd/system``` by the ```failover_handler.sh``` script. + +--- + +## File Descriptions + +All the files involved in failover mechanisms are placed in ```StratosphereLinuxIPS/rpi_scripts/``` and are described in the table below: + + +| File | What it does | +|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| failover_handler.sh | The central orchestrator that checks AP status, ensures root access, prepares directories, sets up iptables persistence, builds the Slips runner script, generates the systemd unit, and enforces all failover behavior. This is the piece that links every component together and decides how the system should react when something breaks. | +| iptables_autosave/check-iptables-hash.sh | Keeps track of the hash of the current iptables rules and triggers a save when change is detected. | +| iptables_autosave/iptables-watcher.service | The systemd service that runs check-iptables-hash.sh (because we can't run the script directly by the timer), this is triggered by the iptables-watcher.timer every 10s to check for iptables changes. | +| iptables_autosave/iptables-watcher.timer | A systemd timer that periodically runs iptables-watcher.service so iptables rule changes are captured and saved automatically. | +| slips_container.log | A runtime log collecting Docker container output, commands, and status. useful for investigating restarts, failures, or unexpected behavior. | +| slips-runner-template.sh | The script that launches Slips container and launches slips inside of it in a tmux. This runner keeps the container up as long as Slips is running. | +| slips.service.template | The systemd unit that starts slips on reboot and on failure, it runs the slips-runner-template.sh. | + + +--- diff --git a/docs/immune/ids_in_the_middle_traffic_routing.md b/docs/immune/ids_in_the_middle_traffic_routing.md index 427f821dd8..04908c58b8 100644 --- a/docs/immune/ids_in_the_middle_traffic_routing.md +++ b/docs/immune/ids_in_the_middle_traffic_routing.md @@ -1,11 +1,11 @@ # Table Of Contents * [IDS-in-the-middle Traffic Routing](#ids-in-the-middle-traffic-routing) * [Problem Statement](#problem-statement) -* [Researched Solutions](#researched-solutions) - * [Using Bridge mode instead of NAT](#using-bridge-mode-instead-of-nat-for-the-rpi-access-point-) +* [Solutions That Didn't Work](#solutions-that-didn-t-work) + * [Using Bridge mode instead of NAT for the RPI Access Point:](#using-bridge-mode-instead-of-nat-for-the-rpi-access-point-) * [Port Mirroring](#port-mirroring) * [A Zeek Cluster with two workers monitoring each interface](#a-zeek-cluster-with-two-workers-monitoring-each-interface) -* [Working Solution](#working-solution) +* [Working Solution Currently Implemented in Slips](#working-solution-currently-implemented-in-slips) * [Usage](#usage) * [Related Links](#related-links) @@ -36,7 +36,7 @@ attack (eth0 in this case) and it doesn't have access to the attacker's IP. This makes out goal is to find a way to access and monitor both interfaces (wlan0 and eth0) when Slips is running as an access point in the RPI. -# Researched Solutions +# Solutions That Didn't Work Here we list the researched solutions to monitor both interfaces in Slips, they all manage to give slips access to the network traffic from both interfaces, but they all have their limitations that made us discard them. @@ -68,10 +68,13 @@ causing frequent reconnection of AP clients, client failing to get an IP address minutes, which is too long to be practical for Slips users. -# Working Solution +# Working Solution Currently Implemented in Slips -Support monitoring 2 interfaces and start 2 instances of Zeek (without a cluster), -one monitoring each interface (wlan0 and eth0). +The following solutions is the one that works and integrates seamlessly into Slips. + +**Goal:** +Monitor 2 interfaces and start 2 instances of Zeek (without a cluster), +with one zeek instance monitoring each interface (wlan0 and eth0). **Pros:** - Integrates well with the current Slips architecture. @@ -83,9 +86,12 @@ Zeek instances resulting in duplicate zeek logs. **Workaround:** -- For solving the duplicate traffic issue, thanks to the flexibility of zeek, we added a filter to show incoming traffic only -in the logs produced by the zeek instance monitoring the ethernet interface. +- For solving the duplicate traffic issue, thanks to the flexibility of zeek, we added a Zeek filter to show the +traffic incoming from the router's network to the RPI in the logs produced by the Zeek instance monitoring the +ethernet interface (the interface responsible for receiving traffic from the router). +- The used Zeek filter is simply `dst net ` added to the CLI args of the zeek instance monitoring +the ethernet interface (eth0 in the graph above) to only log incoming traffic to the local network. # Usage diff --git a/docs/immune/reimplement_slips_features_incompatible_with_the_rpi.md b/docs/immune/reimplement_slips_features_incompatible_with_the_rpi.md index 7f3f5dbf55..a14de848f8 100644 --- a/docs/immune/reimplement_slips_features_incompatible_with_the_rpi.md +++ b/docs/immune/reimplement_slips_features_incompatible_with_the_rpi.md @@ -24,8 +24,8 @@ Docker can run amd64 images on arm devices using emulation, but this method come So we decided to go for an ARM docker image specifically for ARM devices and the RPI. This is doable without maintaining 2 different Dockerfiles one for each architechture thanks to docker buildx multiplatform support. -**Commands for building the ARM image** - +**Commands for building one multi-architecture image (supporting AMD and ARM)** +``` docker buildx create --name slips\_builder docker buildx use slips\_builder @@ -33,6 +33,7 @@ docker buildx use slips\_builder export BUILDKIT\_CONTAINERD=1 docker buildx build --platform linux/amd64,linux/arm64 -t stratosphereips/slips:latest -f docker/Dockerfile --push . +``` Our goal is to maintain one Dockerfile that is able to run on both ARM and AMD architectures. diff --git a/docs/immune/summary_dataset.md b/docs/immune/summary_dataset.md new file mode 100644 index 0000000000..6d6a5b68e9 --- /dev/null +++ b/docs/immune/summary_dataset.md @@ -0,0 +1,174 @@ +# Network Event Summarization Dataset for Slips IDS + +## Table of Contents + +- [1. Task description](#1-task-description) +- [2. Limitations](#2-limitations) + - [Hardware Constraints](#hardware-constraints) + - [Scope Constraints](#scope-constraints) +- [3. Dataset Generation Workflow](#3-dataset-generation-workflow) + - [Stage 1: Incident Sampling](#stage-1-incident-sampling) + - [Stage 2: Structural Analysis](#stage-2-structural-analysis) + - [Stage 3: Multi-Model LLM Analysis](#stage-3-multi-model-llm-analysis) + - [Stage 4: Dataset Correlation](#stage-4-dataset-correlation) + - [Dataset Extension](#dataset-extension) + - [Workflow Diagram](#workflow-diagram) + - [Event Grouping Strategy](#event-grouping-strategy) + - [Additional Optimizations](#additional-optimizations) + - [Dataset Structure](#dataset-structure) + +## 1. Task description + +Develop a dataset for network security event summarization to be integrated with the Slips Immune system, optimized for deployment on low-resource hardware such as the Raspberry Pi 5. This dataset will be used to fine-tune compact language models capable of generating concise and actionable summaries of security incidents from raw Slips alert data, enabling real-time threat analysis in resource-constrained environments. + +The current version of the dataset used for finentuning LLM models is available [here](https://github.com/stratosphereips/Slips-tools/raw/refs/heads/main/alert_summary/datasets/summarization_dataset_v3.json.gz) + + +## 2. Limitations + +### Hardware Constraints +- **Platform**: Raspberry Pi 5 with limited RAM and processing power +- **Model Size**: Only small language models (1.5B-3B parameters) are viable on target hardware +- **Real-time Processing**: Target 10-15 seconds per incident on RPi5 with Ollama requires aggressive token optimization + +### Scope Constraints +- **Alert Format**: Analysis currently limited to Slips alert format; generalization to other IDS outputs requires format adaptation +- **Token Budget**: Input and output tokens must be minimized to enable real-time inference on resource-constrained hardware (~2000 tokens max) +- **Output Constraints**: Summaries must be concise (150-300 tokens) while maintaining security context + +## 3. Dataset Generation Workflow + +The dataset generation process consists of four stages, each implemented as Python scripts with shell wrappers that simplify execution, handle argument validation, and automate file naming. This modular design enables flexible experimentation with different models and configurations while maintaining reproducibility. + +**Detailed documentation**: See [summary_dataset_workflow.md](summary_dataset_workflow.md) for complete pipeline specifications and advanced usage. +The complete set of scripts for creating and updating the dataset is availabble in the [SLIPS tools repository](https://github.com/stratosphereips/Slips-tools/tree/main/alert_summary). + +### Stage 1: Incident Sampling +Extract security incidents from Slips `alerts.json` logs with category labels (Malware/Normal): + +```bash +./sample_dataset.sh 20 my_dataset --category malware --seed 42 +``` + +**Output**: `my_dataset.jsonl` (JSONL format with incidents and events) + +### Stage 2: Structural Analysis +Generate DAG-based chronological analysis of incident events: + +```bash +./generate_dag_analysis.sh my_dataset.jsonl +``` + +**Output**: `my_dataset.dag.json` (incident metadata + event timeline) + +### Stage 3: Multi-Model LLM Analysis +Query multiple language models with optimized prompts: + +```bash +# GPT-4o-mini (baseline) +./generate_llm_analysis.sh my_dataset.jsonl --model gpt-4o-mini \ + --group-events --behavior-analysis + +# Qwen2.5:3b (target model) +./generate_llm_analysis.sh my_dataset.jsonl --model qwen2.5:3b \ + --base-url http://10.147.20.102:11434/v1 --group-events --behavior-analysis + +# Qwen2.5:1.5b (minimal model) +./generate_llm_analysis.sh my_dataset.jsonl --model qwen2.5:1.5b \ + --base-url http://10.147.20.102:11434/v1 --group-events --behavior-analysis +``` + +**Outputs**: Model-specific JSON files with `summary` and `behavior_analysis` fields + +### Stage 4: Dataset Correlation +Merge all analyses into unified dataset by incident ID: + +```bash +python3 correlate_incidents.py my_dataset.*.json \ + --jsonl my_dataset.jsonl -o final_dataset.json +``` + +**Output**: `final_dataset.json` (consolidated dataset with all analyses) + +### Dataset Extension + +To expand existing datasets without regeneration, use `merge_datasets.py` to combine multiple correlated datasets with automatic deduplication: + +```bash +# Generate new samples with different seed +./sample_dataset.sh 20 extension --category malware --seed 99 + +# Run full analysis pipeline on extension +./generate_dag_analysis.sh extension.jsonl +./generate_llm_analysis.sh extension.jsonl --model qwen2.5:3b --group-events --behavior-analysis + +# Correlate extension data +python3 correlate_incidents.py extension.*.json --jsonl extension.jsonl -o extension_dataset.json + +# Merge with existing dataset (removes duplicates by incident_id) +python3 merge_datasets.py final_dataset.json extension_dataset.json -o final_dataset_v2.json +``` + +This approach enables incremental dataset growth while maintaining consistency across all analysis fields. + +### Workflow Diagram + +``` +Raw Slips Logs (alerts.json) + ↓ +[sample_dataset.py] → incidents.jsonl + ↓ + ├─→ [alert_dag_parser.py] → incidents.dag.json + ├─→ [alert_dag_parser_llm.py + GPT-4o-mini] → incidents.llm.gpt-4o-mini.json + ├─→ [alert_dag_parser_llm.py + Qwen2.5:3b] → incidents.llm.qwen2.5.json + └─→ [alert_dag_parser_llm.py + Qwen2.5:1.5b] → incidents.llm.qwen2.5.1.5b.json + ↓ +[correlate_incidents.py] → final_dataset.json +``` + +### Event Grouping Strategy + +The `--group-events` optimization reduces token count through pattern normalization: + +1. **Pattern Normalization**: Replaces variable components in event descriptions with placeholders + - IPv4 addresses → `` + - Port numbers → `` (handles formats: `443/TCP`, `port: 80`) + - Standalone numbers → `` + +2. **Pattern-Based Grouping**: Groups events with identical normalized patterns + - Example: "Connection to 192.168.1.5:443" + "Connection to 10.0.2.15:443" → single pattern "Connection to ``:``" + - Preserves count, time range, and sample values (first 5 unique IPs/ports) per group + +3. **Token Reduction**: + - 103 events: 3,522 → 976 tokens (72% reduction) + - 4,604 events: ~50,000 → 1,897 tokens (96% reduction) + +4. **Information Loss Analysis**: + - **Lost**: Individual timestamps (only ranges), complete IP/port lists (max 5 samples), exact event sequence, duplicate frequency tracking + - **Retained**: Semantic patterns, event counts, representative samples, temporal context, protocol details, attack patterns + - **Impact**: Small incidents (~28% loss), large incidents (~90-95% loss, mostly repetitive data) + - **Justification**: Enables LLM summarization on RPi5; alternative is inability to process large incidents + +### Additional Optimizations + +**Dual-Prompt Analysis** (`--behavior-analysis`): Generates both severity-filtered summaries and structured technical flow analysis, providing richer training signals for model fine-tuning. + +**Severity Filtering Strategy**: The dual-prompt approach implements intelligent filtering to manage token budgets: +- Prioritizes high-threat evidence in summaries for focused incident assessment +- May omit low-confidence events to reduce token consumption +- Balanced by generating both severity-filtered summaries and comprehensive behavior analysis +- Trade-off: Enables complete incident coverage while maintaining concise outputs suitable for resource-constrained deployment + +**Multi-Model Evaluation**: Compares GPT-4o (quality baseline), GPT-4o-mini, Qwen2.5:3b (target deployment), and Qwen2.5:1.5b (minimal viable model) to assess performance-resource trade-offs. + +### Dataset Structure + +Each incident in the final dataset contains: +- **Metadata**: incident_id, category, source_ip, timewindow, threat_level +- **DAG Analysis**: Chronological event timeline with threat scores +- **LLM Summaries**: Model-specific severity assessments +- **Behavior Analysis**: Structured network flow descriptions + +Token efficiency enables deployment on Raspberry Pi 5 while maintaining security analysis quality suitable for real-time intrusion detection. + + diff --git a/docs/immune/summary_dataset_workflow.md b/docs/immune/summary_dataset_workflow.md new file mode 100644 index 0000000000..f986837c93 --- /dev/null +++ b/docs/immune/summary_dataset_workflow.md @@ -0,0 +1,311 @@ +# Dataset Generation Pipeline for Slips Alert Analysis + +## 1. Overview + +This pipeline transforms raw Slips security logs into structured multi-model analysis datasets. The workflow consists of four stages: (1) sampling incidents from raw logs into JSONL format, (2) generating DAG-based structural analysis, (3) producing LLM-enhanced summaries with behavior analysis from multiple models, and (4) correlating all analyses into a unified JSON dataset. The output provides comprehensive incident analysis from different analytical perspectives, enabling comparative evaluation of model performance on security analysis tasks. + +## 2. Pipeline Components + +### 2.1 Python Scripts + +**`sample_dataset.py`** +Samples INCIDENT alerts and their associated EVENT alerts from Slips `alerts.json` files. Preserves the complete event context for each incident by following CorrelID references. Supports filtering by category (normal/malware), severity (low/medium/high), and reproducible sampling via random seeds. Outputs JSONL format compatible with downstream analysis tools. + +**`alert_dag_parser.py`** +Parses JSONL incident files and generates Directed Acyclic Graph (DAG) analysis showing the chronological structure of security events. Extracts incident metadata (source IPs, timewindows, threat levels, timelines) and produces comprehensive event summaries. Outputs structured JSON with incident-level analysis. + +**`alert_dag_parser_llm.py`** +Generates LLM-enhanced analysis by querying language models with structured incident data. Implements two key optimizations: (1) event grouping by pattern normalization (replaces IPs, ports, numbers with placeholders to identify identical patterns), reducing token counts by 96-99% for large incidents, and (2) dual-prompt analysis generating both severity-assessed summaries and structured behavior explanations. Supports multiple LLM backends via OpenAI-compatible APIs. Outputs JSON with both `summary` and `behavior_analysis` fields. + +**`correlate_incidents.py`** +Merges multiple JSON analysis files by matching `incident_id` fields. Combines DAG analysis with multiple LLM analyses (from different models) into a single unified dataset. Automatically detects analysis types from filenames (e.g., `.dag.json`, `.llm.gpt-4o-mini.json`, `.llm.qwen2.5.json`) and creates appropriately named fields in the output. Produces consolidated JSON suitable for model comparison and evaluation. + +**`merge_datasets.py`** +Merges multiple correlated dataset JSON files into a single unified dataset. Removes duplicates based on `incident_id` while preserving all analysis fields from each incident. Useful for extending existing datasets by combining separately generated correlated datasets. Supports multiple input files, automatic deduplication, and optional compact output format. + +### 2.2 Shell Wrappers + +**`sample_dataset.sh`** +Wrapper for `sample_dataset.py` providing simplified command-line interface. Handles argument parsing, validation, and automatic file naming (appends `.jsonl` extension). Supports filtering options, random seed configuration, and optional statistics generation. + +**`generate_dag_analysis.sh`** +Wrapper for `alert_dag_parser.py` with automatic output filename generation based on input JSONL file. Converts `input.jsonl` to `input.dag.json` by default. Provides colored status logging and error handling. + +**`generate_llm_analysis.sh`** +Wrapper for `alert_dag_parser_llm.py` supporting multiple model configurations. Auto-generates output filenames incorporating model names (e.g., `input.llm.gpt-4o-mini.json`, `input.llm.qwen2.5.json`). Handles model endpoint configuration for both cloud APIs (OpenAI) and local servers (Ollama). Passes through optimization flags for event grouping and behavior analysis. + +## 3. Dataset Generation Workflow + +### 3.1 Prerequisites + +**Input Requirements:** +- Raw Slips logs: `alerts.json` files from Slips network security analysis +- Directory structure: `sample_logs/alya_datasets/{Normal,Malware}/...` + +**Model Configuration:** +- **GPT-4o-mini**: OpenAI API key in environment variable `OPENAI_API_KEY` +- **Qwen2.5:3b**: Ollama server running at `http://10.147.20.102:11434/v1` (adjust as needed) +- **Qwen2.5:1.5b**: Ollama server with model installed + +**Software Dependencies:** +- Python 3.6+ with standard library only (no external packages required) +- `bash`, `jq` for shell scripts +- OpenAI Python package for LLM analysis: `pip install openai` + +### 3.2 Step-by-Step Process + +**Step 1: Sample Incidents from Raw Logs** + +Generate a JSONL file containing sampled incidents with all associated events: + +```bash +./sample_dataset.sh 20 my_dataset --category malware --seed 42 --include-stats +``` + +This creates: +- `my_dataset.jsonl` - Sampled incidents and events in JSONL format +- `my_dataset.stats.json` - Statistics about the sample (optional) + +**Step 2: Generate DAG Analysis** + +Parse the JSONL file and generate structural DAG analysis: + +```bash +./generate_dag_analysis.sh my_dataset.jsonl +``` + +Output: `my_dataset.dag.json` - JSON array of incidents with DAG-based analysis + +**Step 3: Generate LLM Analysis (GPT-4o-mini)** + +Query GPT-4o-mini for enhanced analysis with event grouping and behavior analysis: + +```bash +./generate_llm_analysis.sh my_dataset.jsonl \ + --model gpt-4o-mini \ + --base-url https://api.openai.com/v1 \ + --group-events \ + --behavior-analysis +``` + +Output: `my_dataset.llm.gpt-4o-mini.json` - JSON array with `summary` and `behavior_analysis` fields + +**Step 4: Generate LLM Analysis (Qwen2.5:3b)** + +Query Qwen2.5:3b model via Ollama with same optimization flags: + +```bash +./generate_llm_analysis.sh my_dataset.jsonl \ + --model qwen2.5:3b \ + --base-url http://10.147.20.102:11434/v1 \ + --group-events \ + --behavior-analysis +``` + +Output: `my_dataset.llm.qwen2.5.json` - JSON array with model-specific analysis + +**Step 5: Generate LLM Analysis (Qwen2.5:1.5b)** + +Query Qwen2.5:1.5b model for comparison with smaller model: + +```bash +./generate_llm_analysis.sh my_dataset.jsonl \ + --model qwen2.5:1.5b \ + --base-url http://10.147.20.102:11434/v1 \ + --group-events \ + --behavior-analysis +``` + +Output: `my_dataset.llm.qwen2.5.1.5b.json` - JSON array from smaller model + +**Step 6: Correlate All Analyses** + +Merge all analysis files into a unified dataset by incident_id, including category information from the original JSONL: + +```bash +python3 correlate_incidents.py my_dataset.*.json --jsonl my_dataset.jsonl -o final_dataset.json +``` + +Output: `final_dataset.json` - Consolidated dataset with all analyses per incident + +**Note:** The `--jsonl` parameter is used to extract the category field (Malware/Normal) from the original sampled data, ensuring proper ground truth labeling in the final dataset. + +### 3.3 Complete Workflow Example + +```bash +# Full pipeline execution +./sample_dataset.sh 20 my_dataset --category malware --seed 42 +./generate_dag_analysis.sh my_dataset.jsonl +./generate_llm_analysis.sh my_dataset.jsonl --model gpt-4o-mini --group-events --behavior-analysis +./generate_llm_analysis.sh my_dataset.jsonl --model qwen2.5:3b --base-url http://10.147.20.102:11434/v1 --group-events --behavior-analysis +./generate_llm_analysis.sh my_dataset.jsonl --model qwen2.5:1.5b --base-url http://10.147.20.102:11434/v1 --group-events --behavior-analysis +python3 correlate_incidents.py my_dataset.*.json --jsonl my_dataset.jsonl -o final_dataset.json +``` + +Files generated: +- `my_dataset.jsonl` - Sampled incidents (JSONL) +- `my_dataset.dag.json` - DAG analysis +- `my_dataset.llm.gpt-4o-mini.json` - GPT-4o-mini analysis +- `my_dataset.llm.qwen2.5.json` - Qwen2.5:3b analysis +- `my_dataset.llm.qwen2.5.1.5b.json` - Qwen2.5:1.5b analysis +- `final_dataset.json` - Unified correlated dataset + +### 3.4 Extending Existing Datasets + +To add more incidents to an existing correlated dataset without regenerating from scratch: + +**Step 1: Sample Additional Incidents** + +Use a different random seed to ensure new samples don't duplicate existing ones: + +```bash +./sample_dataset.sh 20 extension --category malware --seed 99 +``` + +**Step 2: Generate All Analyses for Extension** + +Run the full analysis pipeline on the new samples: + +```bash +./generate_dag_analysis.sh extension.jsonl +./generate_llm_analysis.sh extension.jsonl --model gpt-4o-mini --group-events --behavior-analysis +./generate_llm_analysis.sh extension.jsonl --model qwen2.5:3b --base-url http://10.147.20.102:11434/v1 --group-events --behavior-analysis +./generate_llm_analysis.sh extension.jsonl --model qwen2.5:1.5b --base-url http://10.147.20.102:11434/v1 --group-events --behavior-analysis +``` + +**Step 3: Correlate Extension Data** + +```bash +python3 correlate_incidents.py extension.*.json --jsonl extension.jsonl -o extension_dataset.json +``` + +**Step 4: Merge with Existing Dataset** + +Combine the original and extension datasets, automatically removing any duplicates: + +```bash +python3 merge_datasets.py final_dataset.json extension_dataset.json -o final_dataset_v2.json +``` + +**Alternative: Merge Multiple Extensions** + +If you have multiple extension datasets: + +```bash +python3 merge_datasets.py final_dataset.json extension1_dataset.json extension2_dataset.json -o combined_dataset.json +``` + +**Note on Deduplication:** The `merge_datasets.py` script automatically detects and removes duplicate incidents based on `incident_id`. If the same incident appears in multiple input files, only the first occurrence is kept. + +**Verification:** After merging, verify the operation completed successfully: + +```bash +python3 verify_merge.py --verbose +``` + +This validates file integrity, count accuracy, deduplication correctness, completeness, and data integrity. Use `--inputs` and `--output` flags to verify custom merge operations. + +## 4. Output Dataset Structure + +The final correlated dataset is a JSON array where each object represents one incident with all analyses: + +```json +[ + { + "incident_id": "bd47e95b-a211-41b1-9644-40d6a2e77a07", + "category": "Malware", + "source_ip": "10.0.2.15", + "timewindow": "12", + "timeline": "2024-04-05 16:53:07 to 16:53:50", + "threat_level": 15.36, + "event_count": 4604, + "dag_analysis": "Comprehensive analysis:\n- Source IP: 10.0.2.15\n- Timewindow: 12...", + "llm_gpt4o_mini_analysis": { + "summary": "Incident bd47e95b-a211-41b1-9644-40d6a2e77a07 involves...", + "behavior_analysis": "**Source:** 10.0.2.15\n**Activity:** Port scanning...\n**Detected Flows:**\n• 10.0.2.15 → 185.29.135.234:443/TCP (HTTPS)\n..." + }, + "llm_qwen2_5_3b_analysis": { + "summary": "This incident represents a sophisticated attack...", + "behavior_analysis": "**Source:** 10.0.2.15\n**Activity:** Multi-stage attack...\n..." + }, + "llm_qwen2_5_1_5b_analysis": { + "summary": "The incident shows malicious behavior with...", + "behavior_analysis": "**Source:** 10.0.2.15\n**Activity:** Network reconnaissance...\n..." + } + } +] +``` + +**Key Fields:** +- `incident_id`: UUID identifying the unique security incident +- `category`: Classification of the capture origin ("Malware" or "Normal") +- `source_ip`: Primary source IP address for the incident +- `timewindow`: Slips timewindow number for temporal context +- `timeline`: Human-readable time range (start to end) +- `threat_level`: Accumulated threat score from Slips +- `event_count`: Number of security events in this incident +- `dag_analysis`: Structural DAG-based analysis (string) +- `llm__analysis`: Object with `summary` and `behavior_analysis` strings + +**Analysis Field Contents:** + +*DAG Analysis:* Chronological event summary with threat levels, detection types, and temporal patterns. + +*LLM Summary:* Severity-assessed event descriptions prioritizing high-confidence and high-threat-level evidence. Groups similar events by pattern to reduce verbosity. + +*LLM Behavior Analysis:* Structured technical explanation formatted as: +``` +**Source:** +**Activity:** +**Detected Flows:** +• (service) +• [additional flows] + +**Summary:** [1-2 sentence technical summary] +``` + +## 5. Performance Considerations + +### Event Grouping (--group-events) + +**Purpose:** Reduce token count for large incidents to enable processing on low-specification devices. + +**Mechanism:** Normalizes event descriptions by replacing variable components (IP addresses → ``, ports → ``, numbers → ``) to identify identical patterns. Groups events with matching normalized patterns while preserving threat level and timing information. + +**Impact:** +- Small incident (103 events): 3,522 tokens → 976 tokens (72% reduction) +- Large incident (4,604 events): ~50,000 tokens → 1,897 tokens (96% reduction) + +**Trade-off:** Slight reduction in granularity (individual IPs/ports shown as samples) for massive token savings. Recommended for all production use. + +### Behavior Analysis (--behavior-analysis) + +**Purpose:** Generate structured technical explanations of network behavior alongside severity-assessed summaries. + +**Mechanism:** Issues two separate LLM queries per incident: +1. Summary prompt: Assesses severity and filters high-priority evidence +2. Behavior prompt: Produces structured flow analysis and technical summary + +**Impact:** +- Adds ~1,500 tokens per incident (behavior prompt) +- Doubles API calls and processing time per incident +- Provides richer analytical context for security analysts + +**Trade-off:** Enhanced analysis quality and readability at cost of increased processing time and API usage. Recommended for datasets under 100 incidents or when quality is prioritized over speed. + +### Combined Usage + +Using both flags together (`--group-events --behavior-analysis`) achieves optimal balance: +- Event grouping minimizes prompt size (token reduction) +- Behavior analysis maximizes output quality (richer insights) +- Large incidents become processable while maintaining analytical depth + +**Example token counts with both flags:** +- 4,604 events: 1,897 tokens (summary) + 1,527 tokens (behavior) = 3,424 total tokens +- Processing time: ~10-15 seconds per incident on low-spec devices (Ollama on Raspberry Pi) + +--- + +**Pipeline Maintained By:** Security Analysis Team +**Last Updated:** 2025-10-13 +**Version:** 2.0 (JSON-based workflow with event grouping and behavior analysis) diff --git a/docs/index.rst b/docs/index.rst index e983109be5..b65092bfd8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -62,3 +62,5 @@ This documentation gives an overview how Slips works, how to use it and how to h FAQ contributing code_documentation + related_repos + visualisation diff --git a/docs/installation.md b/docs/installation.md index c6cc989259..a0e5742e62 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -203,26 +203,25 @@ You can read more about it [here](https://stratospherelinuxips.readthedocs.io/en ## Installing Slips natively -Slips is dependent on three major elements: +Slips depends on three major elements: - Python 3.10.12 -- Zeek +- Zeek 8.0.0 - Redis database 7.0.4 -To install these elements we will use APT package manager. After that, we will install python packages required for Slips to run and its modules to work. Also, Slips' interface Kalipso depend on Node.JS and several npm packages. +To install these elements, the script will use the APT package manager. After that, it will install python packages required for Slips to run and its modules to work. Also, Slips' interface Kalipso depend on Node JS and several npm packages. **Instructions to download everything for Slips are below.**
### Install Slips using shell script -You can install it using install.sh +You can install it using [install.sh](https://github.com/stratosphereips/StratosphereLinuxIPS/blob/master/install/install.sh) sudo chmod +x install.sh sudo ./install.sh - ### Installing Slips manually #### Installing Python, Redis, NodeJs, and required python and npm libraries. @@ -314,6 +313,7 @@ You can kill this redis database by running: ``` then choosing 1. +After these steps, if you need the submodules, you will need to clone them as done in the `install.sh` script. ## Installing Slips on a Raspberry PI diff --git a/docs/related_repos.md b/docs/related_repos.md new file mode 100644 index 0000000000..30ddd27d44 --- /dev/null +++ b/docs/related_repos.md @@ -0,0 +1,3 @@ +# Related Repositories + +- [Slips-tools](https://github.com/stratosphereips/Slips-tools): repo is to store all the tools and scripts needeed to test and evaluate Slips diff --git a/docs/usage.md b/docs/usage.md index de8211283b..d4cd642f4d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -220,20 +220,19 @@ You can press enter to close all ports, then start slips again. Due to issues running Slips on an interface on MacOS, we added the option ```-g``` to run slips on a growing zeek directory -so you can run slips in docker, mount a into the container +so you can run Slips in docker, mount a into the container then start zeek inside it using the following command ``` bro -C -i tcp_inactivity_timeout=60mins tcp_attempt_delay=1min /slips/zeek-scripts``` -Then start slips on your zeek dir using -f normally, and mark the given dir as growing using -g +Then start Slips on your zeek dir using -g and specify the interface you're monitoring with -i. +```./slips.py -e 1 -g -i ``` -```./slips.py -e 1 -f zeek_dir/ -g``` - -By using the -g parameter, slips will treat your given zeek directory as growing (the same way we treat zeek -directories generated by using slips with -i) and will not stop when there -are no flows for a while. +By using the -g parameter, slips will treat your given zeek directory as growing (the same way it treats zeek +directories generated by using slips with -i) and will keep waiting for flows unless stopped by the user. +NOTE: When using -g, it is mandatory to give Slips the same interface zeek is running on with -i. ## Reading the output @@ -383,6 +382,8 @@ This whitelist can be enabled or disabled by changing the ```enable_local_whitel The attacker and victim of every evidence are checked against the whitelist. In addition to all the related IPs, DNS resolutions, SNI, and CNAMEs of the attacker and teh victim. If any of them are whitelisted, the flow/evidence is discarded. +Whitelists now use bloom filters to speed up the process of checking if an IoC is whitelisted or not. + ### Flows Whitelist If you whitelist an IP address, Slips will check all flows and see if you are whitelisting to them or from them. @@ -469,28 +470,6 @@ The values for each column are the following: - Ignore alerts: slips reads all the flows, but it just ignores alerting if there is a match. - Ignore flows: the flow will be completely discarded. -### Removing values from the Whitelist - -Whitelisted IoCs can be updated: -1. When you re-start Slips -2. On the fly while running Slips - -If you're updating the whitelist while Slips is running, be careful to use ; to comment out the lines you want to remove from the db -for example, if you have the following line in `whitelist.conf`: - -``` -organization,google,both,alerts -``` - -To be able to remove this whitelist entry while Slips is running, simply change it to - -``` -# organization,google,both,alerts -``` - -Comments starting with `;` are not removed from the database and are treated as user comments. -Comments starting with `#` will cause Slips to attempt to remove that entry from the database. - ## Popup notifications Slips Support displaying popup notifications whenever there's an alert. @@ -623,7 +602,7 @@ Check [rotation section](https://stratospherelinuxips.readthedocs.io/en/develop/ But you can also enable storing a copy of zeek log files in the output directory after the analysis is done by setting ```store_a_copy_of_zeek_files``` to yes, -or while zeek is stil generating log files by setting ```store_zeek_files_in_the_output_dir``` to yes. +or while zeek is still generating log files by setting ```store_zeek_files_in_the_output_dir``` to yes. this option stores a copy of the zeek files present in ```zeek_files/``` the moment slips stops. so this doesn't include deleted zeek logs. diff --git a/docs/web_visualization.md b/docs/web_visualization.md new file mode 100644 index 0000000000..a819d539ea --- /dev/null +++ b/docs/web_visualization.md @@ -0,0 +1,9 @@ +# Slips Web Visualization + +To see the alerts of Slips in a visual way, the methodology is the following + +1. Slips must be configured to export the alerts in STIX format to a TAXII server, as explained in [exporting](https://stratospherelinuxips.readthedocs.io/en/develop/exporting.html). +2. You need to install a TAXII server (available in the SlipsWeb submodule folder). See its README.md +3. Use the program `SlipsWeb` that is availbale in the StratosphereWeb submodule that reads from the TAXII server. + +All the setup does not consume many resources, so you can run this visualization even in small servers like a Raspberry Pi. However, by having many Slips exporting to the same server you can centralize the visualization of many sensors in a unique location, probably with more hardware if needed. \ No newline at end of file diff --git a/fides b/fides index 8492d6cf21..70657740ce 160000 --- a/fides +++ b/fides @@ -1 +1 @@ -Subproject commit 8492d6cf216e0182b9f96d6ed6baffd3a4c41c24 +Subproject commit 70657740ce4daa3225b9f25bed7b1db00efb6b9f diff --git a/install/apt_dependencies.txt b/install/apt_dependencies.txt index bcf5363bcb..6620b9ea09 100644 --- a/install/apt_dependencies.txt +++ b/install/apt_dependencies.txt @@ -1,15 +1,21 @@ +curl +gnupg +ca-certificates python3 -redis-server python3-pip python3-certifi python3-dev +python3-watchdog +python3-tzlocal +wget +npm +iw build-essential file lsof net-tools iproute2 iptables -python3-tzlocal nfdump tshark git @@ -21,11 +27,5 @@ yara libnotify-bin wireless-tools arp-scan -python3-watchdog -curl -gnupg -ca-certificates +redis-server redis -wget -npm -iw diff --git a/install/install.sh b/install/install.sh index 0ed8aab374..821eea8dc3 100755 --- a/install/install.sh +++ b/install/install.sh @@ -94,7 +94,7 @@ ZEEK_REPO_URL="download.opensuse.org/repositories/security:/zeek/xUbuntu_${UBUNT # Add the repository to the sources list echo "deb http://${ZEEK_REPO_URL}/ /" | tee /etc/apt/sources.list.d/security:zeek.list \ && curl -fsSL "https://${ZEEK_REPO_URL}/Release.key" | gpg --dearmor | tee /etc/apt/trusted.gpg.d/security_zeek.gpg > /dev/null \ -&& sudo apt update && sudo apt install -y --no-install-recommends zeek +&& sudo apt update && sudo apt install -y --no-install-recommends --fix-missing zeek-8.0 # create a symlink to zeek so that slips can find it ln -s /opt/zeek/bin/zeek /usr/local/bin/bro diff --git a/install/requirements.txt b/install/requirements.txt index 4fb72c2a77..95bd849e41 100644 --- a/install/requirements.txt +++ b/install/requirements.txt @@ -8,18 +8,19 @@ pandas==2.3.3 tzlocal==5.3.1 cabby==0.1.23 stix2==3.0.1 -certifi==2025.8.3 +taxii2-client==2.3.0 +certifi==2025.10.5 tensorflow==2.16.1 Keras validators==0.35.0 ipwhois==1.2.0 -matplotlib==3.10.1 +matplotlib==3.10.7 scikit_learn slackclient==2.9.4 -psutil==7.1.2 +psutil==7.1.3 six==1.17.0 pytest==8.4.2 -pytest-mock==3.15.0 +pytest-mock==3.15.1 pytest-xdist==3.8.0 scipy==1.15.1 scikit-learn==1.7.2 @@ -30,16 +31,17 @@ pytest-dependency==0.6.0 whois==1.20240129.2 flask tldextract==5.3.0 -termcolor==3.1.0 +termcolor==3.2.0 yappi==1.7.3 pytest-sugar==1.1.1 aid_hash -black==24.10.0 -ruff==0.14.2 +black==25.9.0 +ruff==0.14.3 pre-commit==4.3.0 coverage==7.11.0 netifaces==0.11.0 scapy==2.6.1 +pybloom_live pyyaml pytest-asyncio vulture diff --git a/managers/ap_manager.py b/managers/ap_manager.py index 4b75977960..e5a7d6f60a 100644 --- a/managers/ap_manager.py +++ b/managers/ap_manager.py @@ -14,7 +14,7 @@ def store_ap_interfaces(self, input_information): """ stores the interfaces given with -ap to slips in the db """ - self.wifi_interface, self.eth_interface = input_information.split(",") + self.wifi_interface, self.eth_interface = input_information.split("_") interfaces = { "wifi_interface": self.wifi_interface, "ethernet_interface": self.eth_interface, diff --git a/managers/host_ip_manager.py b/managers/host_ip_manager.py index 20af9f8b1f..0a36c4692d 100644 --- a/managers/host_ip_manager.py +++ b/managers/host_ip_manager.py @@ -1,6 +1,5 @@ # SPDX-FileCopyrightText: 2021 Sebastian Garcia # SPDX-License-Identifier: GPL-2.0-only -import time import netifaces from typing import ( Set, @@ -8,7 +7,6 @@ Dict, ) -from slips_files.common.slips_utils import utils from slips_files.common.style import green @@ -17,44 +15,12 @@ def __init__(self, main): self.main = main self.info_printed = False - def _get_default_host_ip(self, interface) -> str | None: - """ - Return the host IP of the default interface (IPv4). - usefull when slips is running using -g and the user didn't supply - an interface, so we need to infer it - """ - try: - # Get the default gateway info (usually includes interface name) - addrs = netifaces.ifaddresses(interface) - # AF_INET is for IPv4 addresses - inet_info = addrs.get(netifaces.AF_INET) - if not inet_info: - return None - - return inet_info[0]["addr"] - except Exception as e: - print(f"Error getting host IP: {e}") - return None - def _get_host_ips(self) -> Dict[str, str]: """ tries to determine the machine's IP. uses the intrfaces provided by the user with -i or -ap returns a dict with {interface_name: host_ip, ..} """ - if self.main.args.growing: - # -g is used, user didn't supply the interface - # try to get the default interface - interface = utils.infer_used_interface() - if not interface: - return {} - - if default_host_ip := self._get_default_host_ip(interface): - return {interface: default_host_ip} - return {} - - # we use all interfaces when -g is used, otherwise we use the given - # interface interfaces: List[str] = ( [self.main.args.interface] if self.main.args.interface @@ -63,13 +29,23 @@ def _get_host_ips(self) -> Dict[str, str]: found_ips = {} for iface in interfaces: addrs = netifaces.ifaddresses(iface) - # check for IPv4 address - if netifaces.AF_INET not in addrs: - continue - for addr in addrs[netifaces.AF_INET]: - ip = addr.get("addr") - if ip and not ip.startswith("127."): - found_ips[iface] = ip + # we just need 1 host ip, v4 or v6, preferably v4 though + if netifaces.AF_INET in addrs: + for addr in addrs[netifaces.AF_INET]: + ip = addr.get("addr") + if ip and not ip.startswith("127."): + found_ips[iface] = ip + break + elif netifaces.AF_INET6 in addrs: + for addr in addrs[netifaces.AF_INET6]: + ip = addr.get("addr") + if ip: + try: + ip = ip.split("%")[0] + except KeyError: + pass + found_ips[iface] = ip + break return found_ips def store_host_ip(self) -> Dict[str, str] | None: @@ -86,15 +62,17 @@ def store_host_ip(self) -> Dict[str, str] | None: self.main.db.set_host_ip(ip, iface) if not self.info_printed: self.main.print( - f"Detected host IP: {green(ip)} for {green(iface)}" + f"Detected host IP: {green(ip)} for {green(iface)}\n" ) self.info_printed = True - return host_ips - self.main.print("Not Connected to the internet. Reconnecting in 10s.") - time.sleep(10) - self.store_host_ip() + # uncomment this if in the future we require host ips to start + # slips, then it will get stuck in a loop here until it's abl to + # get the host ip + # self.main.print("Not Connected to the internet. Reconnecting in 10s.") + # time.sleep(10) + # self.store_host_ip() def update_host_ip( self, host_ips: Dict[str, str], modified_profiles: Set[str] diff --git a/managers/process_manager.py b/managers/process_manager.py index 169322e627..92f19cf50c 100644 --- a/managers/process_manager.py +++ b/managers/process_manager.py @@ -39,6 +39,7 @@ from slips_files.common.style import green from slips_files.core.evidence_handler import EvidenceHandler +from slips_files.core.helpers.bloom_filters_manager import BFManager from slips_files.core.input import Input from slips_files.core.output import Output from slips_files.core.profiler import Profiler @@ -111,6 +112,7 @@ def start_profiler_process(self): self.main.args, self.main.conf, self.main.pid, + self.main.bloom_filters_man, is_profiler_done=self.is_profiler_done, profiler_queue=self.profiler_queue, is_profiler_done_event=self.is_profiler_done_event, @@ -134,6 +136,7 @@ def start_evidence_process(self): self.main.args, self.main.conf, self.main.pid, + self.main.bloom_filters_man, ) evidence_process.start() self.main.print( @@ -154,6 +157,7 @@ def start_input_process(self): self.main.args, self.main.conf, self.main.pid, + self.main.bloom_filters_man, is_input_done=self.is_input_done, profiler_queue=self.profiler_queue, input_type=self.main.input_type, @@ -399,6 +403,7 @@ def load_modules(self): self.main.args, self.main.conf, self.main.pid, + self.main.bloom_filters_man, ) module.start() self.main.db.store_pid(module_name, int(module.pid)) @@ -430,17 +435,30 @@ def print_stopped_module(self, module): f"\t{green(module)} \tStopped. " f"" f"{green(modules_left)} left." ) + def init_bloom_filters_manager(self): + """this instance is shared accross all slips IModule instances, + because we dont wanna re-create the filters once for each process, + this way is more memory efficient""" + return BFManager( + self.main.logger, + self.main.args.output, + self.main.redis_port, + self.main.conf, + self.main.pid, + ) + def start_update_manager(self, local_files=False, ti_feeds=False): """ starts the update manager process PS; this function is blocking, slips.py will not start the rest of the - module unless this functionis done + module unless this function's done :kwarg local_files: if true, updates the local ports and org files from disk :kwarg ti_feeds: if true, updates the remote TI feeds. PS: this takes time. """ try: + bloom_filters_man = getattr(self.main, "bloom_filters_man", None) # only one instance of slips should be able to update ports # and orgs at a time # so this function will only be allowed to run from 1 slips @@ -456,6 +474,7 @@ def start_update_manager(self, local_files=False, ti_feeds=False): self.main.args, self.main.conf, self.main.pid, + bloom_filters_man, ) if local_files: @@ -524,7 +543,6 @@ def get_hitlist_in_order(self) -> Tuple[List[Process], List[Process]]: pids_to_kill_last.append( self.main.db.get_pid_of("Exporting Alerts") ) - # remove all None PIDs. this happens when a module in that list # isnt started in the current run. pids_to_kill_last: List[int] = [ @@ -716,6 +734,9 @@ def kill_daemon_children(self): # and we only have access to the PIDs children = self.main.db.get_pids().items() for module_name, pid in children: + if "thread" in module_name.lower(): + # skip threads, they'll be handled by their parent process + continue self.kill_process_tree(int(pid)) self.print_stopped_module(module_name) diff --git a/modules/exporting_alerts/exporting_alerts.py b/modules/exporting_alerts/exporting_alerts.py index 9a527ad210..d341d2bbf6 100644 --- a/modules/exporting_alerts/exporting_alerts.py +++ b/modules/exporting_alerts/exporting_alerts.py @@ -24,6 +24,7 @@ def init(self): self.stix = StixExporter(self.logger, self.db) self.c1 = self.db.subscribe("export_evidence") self.channels = {"export_evidence": self.c1} + self.print("Subscribed to export_evidence channel.", 1, 0) def shutdown_gracefully(self): self.slack.shutdown_gracefully() @@ -35,18 +36,23 @@ def pre_main(self): export_to_slack = self.slack.should_export() export_to_stix = self.stix.should_export() + if not export_to_slack and not export_to_stix: + self.print( + "Exporting Alerts module disabled (no export targets configured).", + 0, + 2, + ) + return 1 + if export_to_slack: self.slack.send_init_msg() - if export_to_stix: + if export_to_stix and self.stix.is_running_non_stop: # This thread is responsible for waiting n seconds before # each push to the stix server # it starts the timer when the first alert happens self.stix.start_exporting_thread() - if not export_to_slack or export_to_stix: - return 1 - def remove_sensitive_info(self, evidence: dict) -> str: """ removes the leaked location co-ords from the evidence @@ -63,6 +69,12 @@ def main(self): # a msg is sent here for each evidence that was part of an alert if msg := self.get_msg("export_evidence"): evidence = json.loads(msg["data"]) + self.print( + f"[ExportingAlerts] Evidence {evidence.get('id')} " + f"type={evidence.get('evidence_type')} received.", + 2, + 0, + ) description = self.remove_sensitive_info(evidence) if self.slack.should_export(): srcip = evidence["profile"]["ip"] @@ -70,11 +82,7 @@ def main(self): self.slack.export(msg_to_send) if self.stix.should_export(): - msg_to_send = ( - evidence["evidence_type"], - evidence["attacker"]["value"], - ) - added_to_stix: bool = self.stix.add_to_stix_file(msg_to_send) + added_to_stix: bool = self.stix.add_to_stix_file(evidence) if added_to_stix: # now export to taxii self.stix.export() diff --git a/modules/exporting_alerts/stix_exporter.py b/modules/exporting_alerts/stix_exporter.py index 1833258983..3957053f71 100644 --- a/modules/exporting_alerts/stix_exporter.py +++ b/modules/exporting_alerts/stix_exporter.py @@ -1,10 +1,16 @@ # SPDX-FileCopyrightText: 2021 Sebastian Garcia # SPDX-License-Identifier: GPL-2.0-only -from stix2 import Indicator, Bundle -from cabby import create_client -import time -import threading +import json import os +import threading +import time +from datetime import datetime, timezone +from uuid import uuid4 +from typing import Dict, List, Optional +from urllib.parse import urljoin + +from stix2 import Bundle, Indicator, parse +from taxii2client.v21 import Server from slips_files.common.abstracts.iexporter import IExporter from slips_files.common.parsers.config_parser import ConfigParser @@ -15,113 +21,244 @@ class StixExporter(IExporter): def init(self): self.port = None self.is_running_non_stop: bool = self.db.is_running_non_stop() - self.stix_filename = "STIX_data.json" + self.output_dir = self._resolve_output_dir() + self.stix_filename = os.path.join(self.output_dir, "STIX_data.json") self.configs_read: bool = self.read_configuration() + self.export_to_taxii_thread = None + self.last_exported_count = 0 if self.should_export(): self.print( - f"Exporting to Stix & TAXII very " + f"Exporting alerts to STIX 2 / TAXII every " f"{self.push_delay} seconds." ) - # This bundle should be created once and we should - # append all indicators to it - self.is_bundle_created = False - # To avoid duplicates in STIX_data.json - self.added_ips = set() - self.export_to_taxii_thread = threading.Thread( - target=self.schedule_sending_to_taxii_server, - daemon=True, - name="stix_exporter_to_taxii_thread", - ) + self.exported_evidence_ids = set() + self.bundle_objects: List[Indicator] = [] + self.last_exported_count = 0 + self._load_existing_bundle() + self._ensure_bundle_file() + if self.is_running_non_stop: + self.export_to_taxii_thread = threading.Thread( + target=self.schedule_sending_to_taxii_server, + daemon=True, + name="stix_exporter_to_taxii_thread", + ) def start_exporting_thread(self): # This thread is responsible for waiting n seconds before # each push to the stix server # it starts the timer when the first alert happens - utils.start_thread(self.export_to_taxii_thread, self.db) + if self.export_to_taxii_thread: + utils.start_thread(self.export_to_taxii_thread, self.db) @property def name(self): return "StixExporter" - def create_client(self): - client = create_client( - self.TAXII_server, - use_https=self.use_https, - port=self.port, - discovery_path=self.discovery_path, - ) + def _base_url(self) -> str: + scheme = "https" if self.use_https else "http" + default_port = 443 if self.use_https else 80 + if self.port: + try: + port = int(self.port) + except (TypeError, ValueError): + port = None + if port and port != default_port: + return f"{scheme}://{self.TAXII_server}:{port}" + return f"{scheme}://{self.TAXII_server}" - if self.jwt_auth_path != "": - client.set_auth( - username=self.taxii_username, - password=self.taxii_password, - # URL used to obtain JWT token - jwt_auth_url=self.jwt_auth_path, - ) - else: - # User didn't provide jwt_auth_path in slips.yaml - client.set_auth( - username=self.taxii_username, - password=self.taxii_password, - ) - return client + def _build_url(self, path: str) -> str: + if not path: + return self._base_url() + if path.startswith("http://") or path.startswith("https://"): + return path + # urljoin discards url path if relative path does not start with / + adjusted = path if path.startswith("/") else f"/{path}" + return urljoin(self._base_url(), adjusted) - def inbox_service_exists_in_taxii_server(self, services): + def _resolve_output_dir(self) -> str: """ - Checks if inbox service is available in the taxii server + Determines the directory where STIX_data.json should be stored. + Falls back to the current working directory if the DB does not + have an output directory set yet. """ - for service in services: - if "inbox" in service.type.lower(): - return True + output_dir = getattr(self.db, "output_dir", None) + if not output_dir: + output_dir = self.db.get_output_dir() + if isinstance(output_dir, bytes): + output_dir = output_dir.decode("utf-8") + if not output_dir: + output_dir = os.getcwd() + output_dir = os.path.abspath(output_dir) + os.makedirs(output_dir, exist_ok=True) + return output_dir + + def _load_existing_bundle(self) -> None: + """ + Loads indicators from an existing STIX_data.json file so we can resume + without creating duplicates if Slips was restarted. + """ + if not os.path.exists(self.stix_filename): + return + try: + with open(self.stix_filename, "r") as stix_file: + data = stix_file.read().strip() + except OSError as err: + self.print(f"Unable to read {self.stix_filename}: {err}", 0, 3) + return + + if not data: + return + + try: + bundle = parse(data, allow_custom=True) + except Exception as err: # stix2 raises generic Exception + self.print(f"Invalid STIX bundle, starting fresh: {err}", 0, 3) + return + + if not isinstance(bundle, Bundle): + self.print("STIX_data.json does not contain a bundle.", 0, 3) + return + + self.bundle_objects = list(bundle.objects) + self.last_exported_count = len(self.bundle_objects) + for indicator in self.bundle_objects: + evidence_id = self._extract_evidence_id(indicator) + if evidence_id: + self.exported_evidence_ids.add(evidence_id) + + def _ensure_bundle_file(self) -> None: + """ + Guarantee that STIX_data.json exists even before the first indicator + arrives so the user can inspect the file immediately. + """ + if os.path.exists(self.stix_filename): + return + bundle_stub = { + "type": "bundle", + "id": f"bundle--{uuid4()}", + "objects": [], + } + with open(self.stix_filename, "w") as stix_file: + json.dump(bundle_stub, stix_file, indent=2) + + def _extract_evidence_id(self, indicator: Indicator) -> Optional[str]: + try: + return indicator.get("x_slips_evidence_id") # type: ignore[index] + except AttributeError: + return None + + def _serialize_bundle(self) -> str: + bundle = Bundle(*self.bundle_objects, allow_custom=True) + return bundle.serialize(pretty=True) + + def _write_bundle(self) -> None: + if not self.bundle_objects: + self._ensure_bundle_file() + return + + with open(self.stix_filename, "w") as stix_file: + stix_file.write(self._serialize_bundle()) + + def create_collection(self): + if not self.collection_name: + self.print( + "collection_name is missing in slips.yaml; cannot export STIX.", + 0, + 3, + ) + return None + + discovery_url = self._build_url(self.discovery_path) + try: + server = Server( + discovery_url, + user=self.taxii_username or None, + password=self.taxii_password or None, + ) + except Exception as err: + self.print(f"Failed to connect to TAXII discovery: {err}", 0, 3) + return None + + if not server.api_roots: + self.print("TAXII server returned no API roots.", 0, 3) + return None + + for api_root in server.api_roots: + try: + for collection in api_root.collections: + if collection.id == self.collection_name: + return collection + if ( + hasattr(collection, "title") + and collection.title == self.collection_name + ): + return collection + except Exception as err: + self.print( + f"Could not list collections for API root {api_root.url}: {err}", + 0, + 3, + ) self.print( - "Server doesn't have inbox available. " - "Exporting STIX_data.json is cancelled.", + f"Collection '{self.collection_name}' was not found on the TAXII " + f"server.", 0, - 2, + 3, ) - return False + return None def read_stix_file(self) -> str: - with open(self.stix_filename) as stix_file: - stix_data = stix_file.read() - return stix_data + if not os.path.exists(self.stix_filename): + return "" + + with open(self.stix_filename, "r") as stix_file: + return stix_file.read() def export(self) -> bool: """ - Exports evidence/alerts to the TAXII server - Uses Inbox Service (TAXII Service to Support Producer-initiated - pushes of cyber threat information) to publish - our STIX_data.json file + Exports evidence/alerts to a TAXII 2.x collection by pushing the + STIX_data.json bundle as a TAXII envelope. """ - if not self.should_export: + if not self.should_export(): return False - client = self.create_client() + stix_data: str = self.read_stix_file() + if len(stix_data.strip()) == 0: + return False - # Check the available services to make sure inbox service is there - services = client.discover_services() - if not self.inbox_service_exists_in_taxii_server(services): + try: + bundle_dict = json.loads(stix_data) + except json.JSONDecodeError as err: + self.print(f"STIX_data.json is not valid JSON: {err}", 0, 3) return False - stix_data: str = self.read_stix_file() + objects = bundle_dict.get("objects") or [] + if not objects: + return False - # Make sure we don't push empty files - if len(stix_data) == 0: + new_objects = objects[self.last_exported_count :] + if not new_objects: return False - binding = "urn:stix.mitre.org:json:2.1" - # URI is the path to the inbox service we want to - # use in the taxii server - client.push( - stix_data, - binding, - collection_names=[self.collection_name], - uri=self.inbox_path, - ) + collection = self.create_collection() + if not collection: + return False + + envelope = {"objects": new_objects} + + try: + collection.add_objects(envelope) + except Exception as err: + self.print(f"Failed to push bundle to TAXII collection: {err}", 0, 3) + return False + + self.last_exported_count = len(objects) + self.print( - f"Successfully exported to TAXII server: " f"{self.TAXII_server}.", - 1, + f"Successfully exported {len(new_objects)} indicators to TAXII " + f"collection '{self.collection_name}'.", + 2, 0, ) return True @@ -134,7 +271,7 @@ def shutdown_gracefully(self): def should_export(self) -> bool: """Determines whether to export or not""" - return self.is_running_non_stop and "stix" in self.export_to + return "stix" in self.export_to def read_configuration(self) -> bool: """Reads configuration""" @@ -149,91 +286,161 @@ def read_configuration(self) -> bool: self.port = conf.taxii_port() self.use_https = conf.use_https() self.discovery_path = conf.discovery_path() - self.inbox_path = conf.inbox_path() # push_delay is only used when slips is running using -i self.push_delay = conf.push_delay() self.collection_name = conf.collection_name() self.taxii_username = conf.taxii_username() self.taxii_password = conf.taxii_password() - self.jwt_auth_path = conf.jwt_auth_path() # push delay exists -> create a thread that waits # push delay doesn't exist -> running using file not interface # -> only push to taxii server once before # stopping return True - def ip_exists_in_stix_file(self, ip): - """Searches for ip in STIX_data.json to avoid exporting duplicates""" - return ip in self.added_ips - def get_ioc_pattern(self, ioc_type: str, attacker) -> str: patterns_map = { "ip": f"[ip-addr:value = '{attacker}']", "domain": f"[domain-name:value = '{attacker}']", "url": f"[url:value = '{attacker}']", } - if ioc_type not in ioc_type: + pattern = patterns_map.get(ioc_type) + if not pattern: self.print(f"Can't set pattern for STIX. {attacker}", 0, 3) - return False - return patterns_map[ioc_type] + return "" + return pattern + + def _build_indicator_labels(self, evidence: dict) -> List[str]: + labels = [] + evidence_type = evidence.get("evidence_type") + if evidence_type: + labels.append(str(evidence_type).lower()) + threat_level = evidence.get("threat_level") + if threat_level: + labels.append(str(threat_level).lower()) + return labels + + def _build_valid_from(self, evidence: dict) -> Optional[datetime]: + timestamp = evidence.get("timestamp") + if not timestamp: + return None + try: + dt_obj = utils.convert_to_datetime(timestamp) + except Exception: + return None + if not utils.is_aware(dt_obj): + dt_obj = utils.convert_ts_to_tz_aware(dt_obj) + return dt_obj.astimezone(timezone.utc) - def add_to_stix_file(self, to_add: tuple) -> bool: + def _build_custom_properties( + self, evidence: dict, date_added: Optional[str] + ) -> Dict[str, object]: + victim = evidence.get("victim") or {} + attacker = evidence.get("attacker") or {} + timewindow = evidence.get("timewindow") or {} + profile = evidence.get("profile") or {} + + custom_properties: Dict[str, object] = { + "x_slips_evidence_id": evidence.get("id"), + "x_slips_threat_level": evidence.get("threat_level"), + "x_slips_profile_ip": profile.get("ip"), + "x_slips_timewindow": timewindow.get("number"), + "x_slips_attacker_direction": attacker.get("direction"), + "x_slips_attacker_ti": attacker.get("TI"), + "date_added": date_added, + } + + victim_value = victim.get("value") + if victim_value: + custom_properties["x_slips_victim"] = victim_value + + uids = evidence.get("uid") + if uids: + custom_properties["x_slips_flow_uids"] = uids + + dst_port = evidence.get("dst_port") + if dst_port: + custom_properties["x_slips_dst_port"] = dst_port + + src_port = evidence.get("src_port") + if src_port: + custom_properties["x_slips_src_port"] = src_port + + return { + key: value + for key, value in custom_properties.items() + if value not in (None, "", [], {}) + } + + def add_to_stix_file(self, evidence: dict) -> bool: """ Function to export evidence to a STIX_data.json file in the cwd. It keeps appending the given indicator to STIX_data.json until they're sent to the taxii server - msg_to_send is a tuple: (evidence_type,attacker) - evidence_type: e.g PortScan, ThreatIntelligence etc - attacker: ip of the attcker + evidence is a dictionary that contains the alert data """ - evidence_type, attacker = ( - to_add[0], - to_add[1], + attacker = (evidence.get("attacker") or {}).get("value") + if not attacker: + attacker = (evidence.get("profile") or {}).get("ip") + if not attacker: + attacker = (evidence.get("victim") or {}).get("value") + if not attacker: + self.print("Evidence missing attacker value; skipping.", 0, 3) + return False + + evidence_id = evidence.get("id") + if evidence_id and evidence_id in self.exported_evidence_ids: + self.print( + f"Evidence {evidence_id} already exported; skipping.", + 3, + 0, + ) + return False + + self.print( + f"Processing evidence {evidence_id or attacker} " + f"(profile={evidence.get('profile')}, attacker={evidence.get('attacker')})", + 2, + 0, ) - # Get the right description to use in stix - name = evidence_type + ioc_type = utils.detect_ioc_type(attacker) pattern: str = self.get_ioc_pattern(ioc_type, attacker) - # Required Indicator Properties: type, spec_version, id, created, - # modified , all are set automatically - # Valid_from, created and modified attribute will - # be set to the current time - # ID will be generated randomly - # ref https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_6khi84u7y58g + if not pattern: + self.print( + f"Unable to build STIX pattern for attacker {attacker}.", 0, 3 + ) + return False + + indicator_labels = self._build_indicator_labels(evidence) + valid_from = self._build_valid_from(evidence) + date_added = ( + valid_from.isoformat() + if isinstance(valid_from, datetime) + else datetime.utcnow().replace(tzinfo=timezone.utc).isoformat() + ) + custom_properties = self._build_custom_properties(evidence, date_added) + indicator = Indicator( - name=name, pattern=pattern, pattern_type="stix" + name=evidence.get("evidence_type", "Slips Alert"), + description=evidence.get("description"), + pattern=pattern, + pattern_type="stix", + valid_from=valid_from, + labels=indicator_labels or None, + allow_custom=True, + custom_properties=custom_properties or None, ) # the pattern language that the indicator pattern is expressed in. - # Create and Populate Bundle. - # All our indicators will be inside bundle['objects']. - bundle = Bundle() - if not self.is_bundle_created: - bundle = Bundle(indicator) - # Clear everything in the existing STIX_data.json - # if it's not empty - open(self.stix_filename, "w").close() - # Write the bundle. - with open(self.stix_filename, "w") as stix_file: - stix_file.write(str(bundle)) - self.is_bundle_created = True - elif not self.ip_exists_in_stix_file(attacker): - # Bundle is already created just append to it - # r+ to delete last 4 chars - with open(self.stix_filename, "r+") as stix_file: - # delete the last 4 characters in the file ']\n}\n' so we - # can append to the objects array and add them back later - stix_file.seek(0, os.SEEK_END) - stix_file.seek(stix_file.tell() - 4, 0) - stix_file.truncate() - - # Append mode to add the new indicator to the objects array - with open(self.stix_filename, "a") as stix_file: - # Append the indicator in the objects array - stix_file.write(f",{str(indicator)}" + "]\n}\n") - - # Set of unique ips added to stix_data.json to avoid duplicates - self.added_ips.add(attacker) - self.print("Indicator added to STIX_data.json", 2, 0) + + self.bundle_objects.append(indicator) + self._write_bundle() + + if evidence_id: + self.exported_evidence_ids.add(evidence_id) + + self.print( + f"Indicator added to STIX bundle at {self.stix_filename}", 2, 0 + ) return True def schedule_sending_to_taxii_server(self): @@ -250,9 +457,6 @@ def schedule_sending_to_taxii_server(self): # new alerts in stix_data.json yet if os.path.exists(self.stix_filename): self.export() - # Delete stix_data.json file so we don't send duplicates - os.remove(self.stix_filename) - self.is_bundle_created = False else: self.print( f"{self.push_delay} seconds passed, " diff --git a/modules/flowalerts/conn.py b/modules/flowalerts/conn.py index 3edca73df1..b3eb701c2a 100644 --- a/modules/flowalerts/conn.py +++ b/modules/flowalerts/conn.py @@ -19,7 +19,7 @@ NOT_ESTAB = "Not Established" ESTAB = "Established" -SPECIAL_IPV6 = ("0.0.0.0", "255.255.255.255") +SPECIAL_IPV4 = ("0.0.0.0", "255.255.255.255") class Conn(IFlowalertsAnalyzer): @@ -41,8 +41,6 @@ def init(self): self.our_ips: List[str] = utils.get_own_ips(ret="List") self.input_type: str = self.db.get_input_type() self.multiple_reconnection_attempts_threshold = 5 - # we use this to try to detect if there's dns server that has a - # private ip outside of localnet def read_configuration(self): conf = ConfigParser() @@ -367,7 +365,7 @@ def _is_it_ok_for_ip_to_change(ip) -> bool: """Devices send flow as/to these ips all the time, the're not suspicious not need to detect them.""" # its ok to change ips from a link local ip to another private ip - return ip in SPECIAL_IPV6 or ipaddress.ip_address(ip).is_link_local + return ip in SPECIAL_IPV4 or ipaddress.ip_address(ip).is_link_local def check_device_changing_ips(self, twid, flow): """ @@ -713,12 +711,21 @@ def is_well_known_org(self, ip): return True return False - def _is_ok_to_connect_to_ip(self, ip: str) -> bool: + def _is_ok_to_connect_to_ip_outside_localnet(self, flow) -> bool: """ returns true if it's ok to connect to the given IP even if it's "outside the given local network" """ - return ip in SPECIAL_IPV6 or utils.is_localhost(ip) + for ip in (flow.saddr, flow.daddr): + ip_obj = ipaddress.ip_address(ip) + return ( + # because slips only knows about the ipv4 local networks + not validators.ipv4(ip) + or ip in SPECIAL_IPV4 + or not ip_obj.is_private + or ip_obj.is_loopback + or ip_obj.is_multicast + ) def _is_dns(self, flow) -> bool: return str(flow.dport) == "53" and flow.proto.lower() == "udp" @@ -736,26 +743,23 @@ def check_different_localnet_usage( If we are on 192.168.1.0/24 then detect anything coming from/to 10.0.0.0/8 :param what_to_check: can be 'srcip' or 'dstip' + PS: most changes here should be in + dns.py::check_different_localnet_usage() so remember to update both:D """ if self._is_dns(flow): # dns flows are checked fot this same detection in dns.py return - if self._is_ok_to_connect_to_ip( - flow.saddr - ) or self._is_ok_to_connect_to_ip(flow.daddr): + if self._is_ok_to_connect_to_ip_outside_localnet(flow): return ip_to_check = flow.saddr if what_to_check == "srcip" else flow.daddr ip_obj = ipaddress.ip_address(ip_to_check) - if not (validators.ipv4(ip_to_check) and utils.is_private_ip(ip_obj)): - return - own_local_network = self.db.get_local_network(flow.interface) if not own_local_network: # the current local network wasn't set in the db yet - # it's impossible to get here becaus ethe localnet is set before + # it's impossible to get here because the localnet is set before # any msg is published in the new_flow channel return diff --git a/modules/flowalerts/dns.py b/modules/flowalerts/dns.py index 6d74c2a27c..8136b85a44 100644 --- a/modules/flowalerts/dns.py +++ b/modules/flowalerts/dns.py @@ -16,6 +16,7 @@ from multiprocessing import Queue from threading import Thread, Event + from slips_files.common.abstracts.iflowalerts_analyzer import ( IFlowalertsAnalyzer, ) @@ -25,6 +26,9 @@ from slips_files.core.structures.evidence import Direction +SPECIAL_IPV4 = ("0.0.0.0", "255.255.255.255") + + class DNS(IFlowalertsAnalyzer): def init(self): self.read_configuration() @@ -636,6 +640,24 @@ def is_possible_dns_misconfiguration(self, ip_to_check, flow) -> bool: self.priv_ips_doing_dns_outside_of_localnet[flow.daddr] = 1 return True + def _is_ok_to_connect_to_ip_outside_localnet(self, flow) -> bool: + """ + returns true if it's ok to connect to the given IP even if it's + "outside the given local network" + """ + for ip in (flow.saddr, flow.daddr): + ip_obj = ipaddress.ip_address(ip) + return ( + # if the ip is the dns server that slips detected, + # it's ok to connect to it + ip == self.detected_dns_ip + or not validators.ipv4(ip) + or ip in SPECIAL_IPV4 + or not ip_obj.is_private + or ip_obj.is_loopback + or ip_obj.is_multicast + ) + def check_different_localnet_usage( self, twid, @@ -652,22 +674,17 @@ def check_different_localnet_usage( only checks connections to dst port 53/UDP. the rest are checked in conn.log """ - # if the ip is the dns server that slips detected, it's ok to - # connect to it - if ( - flow.saddr == self.detected_dns_ip - or flow.daddr == self.detected_dns_ip - ): - return - if not self._is_dns(flow): # the non dns flows are checked in conn.py return + if self._is_ok_to_connect_to_ip_outside_localnet(flow): + return + ip_to_check = flow.saddr if what_to_check == "srcip" else flow.daddr ip_obj = ipaddress.ip_address(ip_to_check) - if not (validators.ipv4(ip_to_check) and utils.is_private_ip(ip_obj)): + if not (utils.is_private_ip(ip_obj)): return if self.is_possible_dns_misconfiguration(ip_to_check, flow): diff --git a/modules/flowalerts/flowalerts.py b/modules/flowalerts/flowalerts.py index 49cb316849..9a59510659 100644 --- a/modules/flowalerts/flowalerts.py +++ b/modules/flowalerts/flowalerts.py @@ -29,7 +29,7 @@ class FlowAlerts(AsyncModule): def init(self): self.subscribe_to_channels() - self.whitelist = Whitelist(self.logger, self.db) + self.whitelist = Whitelist(self.logger, self.db, self.bloom_filters) self.dns = DNS(self.db, flowalerts=self) self.software = Software(self.db, flowalerts=self) self.notice = Notice(self.db, flowalerts=self) diff --git a/modules/ip_info/ip_info.py b/modules/ip_info/ip_info.py index 085349a957..e16aaf51e8 100644 --- a/modules/ip_info/ip_info.py +++ b/modules/ip_info/ip_info.py @@ -64,7 +64,7 @@ def init(self): "new_dns": self.c3, "check_jarm_hash": self.c4, } - self.whitelist = Whitelist(self.logger, self.db) + self.whitelist = Whitelist(self.logger, self.db, self.bloom_filters) self.is_running_non_stop: bool = self.db.is_running_non_stop() self.valid_tlds = whois.validTlds() self.is_running_in_ap_mode: bool = ( diff --git a/modules/p2ptrust/p2ptrust.py b/modules/p2ptrust/p2ptrust.py index 0f5befa032..5711503ba1 100644 --- a/modules/p2ptrust/p2ptrust.py +++ b/modules/p2ptrust/p2ptrust.py @@ -27,6 +27,8 @@ Direction, ) +LOCALHOST = "127.0.0.1" + def validate_slips_data(message_data: str) -> (str, int): """ @@ -158,6 +160,8 @@ def get_local_IP(self): def get_available_port(self) -> int: for port in range(32768, 65535): + if port == self.redis_port: + continue sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: sock.bind(("0.0.0.0", port)) @@ -195,30 +199,23 @@ def _configure(self): f"Did you include it in PATH?. Exiting process." ) return - executable = [self.pigeon_binary] - port_param = ["-port", str(self.port)] - # if '-ip' in sys.argv: - # ip_to_listen_on = sys.argv[sys.argv.index('-ip')+1] - # host_param = ["-host", ip_to_listen_on ] - # print(f"P2P modules is listening on ip {ip_to_listen_on} port: {self.port}, using '-ip' parameter") - # else: - host_param = ["-host", self.host] + + params = { + "-port": str(self.port), + "-host": self.host, + "-key-file": self.pigeon_key_file, + "--redis-db": f"localhost:{self.redis_port}", + "-redis-channel-pygo": self.pygo_channel_raw, + "-redis-channel-gopy": self.gopy_channel_raw, + } self.print( - f"P2p is listening on {self.host} port {self.port} determined " - f"by p2p module" + f"P2P is listening on {self.host} port {self.port} " + f"(determined by p2p module)" ) + executable = [self.pigeon_binary] + [ + item for pair in params.items() for item in pair + ] - keyfile_param = ["-key-file", self.pigeon_key_file] - # rename_with_port_param = ["-rename-with-port", - # str(self.rename_with_port).lower()] - pygo_channel_param = ["-redis-channel-pygo", self.pygo_channel_raw] - gopy_channel_param = ["-redis-channel-gopy", self.gopy_channel_raw] - executable.extend(port_param) - executable.extend(host_param) - executable.extend(keyfile_param) - # executable.extend(rename_with_port_param) - executable.extend(pygo_channel_param) - executable.extend(gopy_channel_param) if self.create_p2p_logfile: outfile = open(self.pigeon_logfile, "+w") else: @@ -647,7 +644,7 @@ def main(self): # give the pigeon time to put the multiaddr in the db time.sleep(2) multiaddr = self.db.get_multiaddr() - self.print(f"You Multiaddress is: {multiaddr}") + self.print(f"You Multiaddress is: {multiaddr}\n") self.mutliaddress_printed = True except Exception: diff --git a/modules/threat_intelligence/threat_intelligence.py b/modules/threat_intelligence/threat_intelligence.py index 2c3773cf49..0c589b7f19 100644 --- a/modules/threat_intelligence/threat_intelligence.py +++ b/modules/threat_intelligence/threat_intelligence.py @@ -609,6 +609,10 @@ def is_valid_threat_level(self, threat_level): return threat_level in utils.threat_levels def parse_known_fp_hashes(self, fullpath: str): + """ + That file contains known FalsePositives of hashes to reduce the + amount of FP from TI files + """ fp_hashes = {} with open(fullpath) as fps: # skip comments diff --git a/modules/update_manager/update_manager.py b/modules/update_manager/update_manager.py index 96cb1b5ade..4fd7abc1bd 100644 --- a/modules/update_manager/update_manager.py +++ b/modules/update_manager/update_manager.py @@ -56,7 +56,7 @@ def init(self): self.loaded_ti_files = 0 # don't store iocs older than 1 week self.interval = 7 - self.whitelist = Whitelist(self.logger, self.db) + self.whitelist = Whitelist(self.logger, self.db, self.bloom_filters) self.slips_logfile = self.db.get_stdfile("stdout") self.org_info_path = "slips_files/organizations_info/" self.path_to_mac_db = "databases/macaddress-db.json" @@ -1484,10 +1484,16 @@ def update_local_whitelist(self): self.whitelist.update() def update_org_files(self): + """ + This func handles organizations whitelist files. + It updates the local IoCs of every supported organization in the db + and initializes the bloom filters + """ for org in utils.supported_orgs: org_ips = os.path.join(self.org_info_path, org) org_asn = os.path.join(self.org_info_path, f"{org}_asn") org_domains = os.path.join(self.org_info_path, f"{org}_domains") + if self.check_if_update_org(org_ips): self.whitelist.parser.load_org_ips(org) @@ -1576,10 +1582,11 @@ def update_online_whitelist(self): # delete the old ones self.db.delete_tranco_whitelist() response = self.responses["tranco_whitelist"] + domains = [] for line in response.text.splitlines(): - domain = line.split(",")[1] - domain.strip() - self.db.store_tranco_whitelisted_domain(domain) + domain = line.split(",")[1].strip() + domains.append(domain) + self.db.store_tranco_whitelisted_domains(domains) self.mark_feed_as_updated("tranco_whitelist") diff --git a/p2p4slips b/p2p4slips index 73597762b5..9a9ade5d42 160000 --- a/p2p4slips +++ b/p2p4slips @@ -1 +1 @@ -Subproject commit 73597762b5970524828ba49141b91e6f32b9315e +Subproject commit 9a9ade5d4206362e0e9f4fcc948e9c77d524c498 diff --git a/rpi_scripts/failover_handler.sh b/rpi_scripts/failover_handler.sh new file mode 100755 index 0000000000..6fbfc5264e --- /dev/null +++ b/rpi_scripts/failover_handler.sh @@ -0,0 +1,172 @@ +#!/usr/bin/env bash + +set -euo pipefail + +RESET="\033[0m"; BOLD="\033[1m"; RED="\033[0;31m"; GREEN="\033[0;32m"; YELLOW="\033[0;33m"; BLUE="\033[0;34m" +echoc() { printf "%b\n" "$*"; } + +usage() { + cat <, + +-h Show help +, e.g. wlan0,eth0 + +Example: + $0 wlan0,eth0 + +This script will: + - Require root (re-exec with sudo if needed) + - Check for a running create_ap instance, and exit if not found + - Create ./output and ./config if missing + - Install iptables persistence & save iptables rules on any change. + - Run Slips inside Docker + tmux + - Log Slips & Docker status to slips_container.log + - Create a systemd unit for Slips for persistence +EOF +} + +ensure_root() { + if [ "$(id -u)" -ne 0 ]; then + echoc "${YELLOW}Root required, re-running with sudo...${RESET}" + # Flush output to terminal + sleep 0.1 + exec sudo bash "$0" "$@" + fi +} + +parse_interfaces() { + if [ "${1:-}" = "-h" ]; then usage; exit 0; fi + if [ $# -ne 1 ]; then usage; exit 1; fi + IFS=',' read -r WIFI_IF ETH_IF <<< "$1" || { echoc "${RED}Invalid format.${RESET}"; exit 1; } + [ -z "$WIFI_IF" ] || [ -z "$ETH_IF" ] && { echoc "${RED}Missing interface(s).${RESET}"; exit 1; } + echoc "${GREEN}Using WiFi interface: ${WIFI_IF}, Ethernet interface: ${ETH_IF}${RESET}" +} + +ensure_create_ap_is_running() { + echoc "${BLUE}Checking for running create_ap...${RESET}" + if ! pgrep -a create_ap | grep -E "\\b${WIFI_IF}\\b.*\\b${ETH_IF}\\b" >/dev/null 2>&1; then + echoc "${RED}create_ap is not running for ${WIFI_IF},${ETH_IF}.${RESET}" + echoc "${YELLOW}Run first:${RESET}" + echoc "${BOLD}sudo create_ap ${WIFI_IF} ${ETH_IF} rpi_wifi mysecurepassword -c 40${RESET}" + exit 1 + fi +} + + +create_directories() { + CWD="$(pwd -P)" + OUTPUT_DIR="$CWD/output" + echoc "${BLUE}\nChecking if ${OUTPUT_DIR} exists...${RESET} " + + if [ -d "$OUTPUT_DIR" ]; then + echoc "${GREEN}${OUTPUT_DIR} exists${RESET}" + else + mkdir -p "$OUTPUT_DIR" + echoc "${GREEN}Created ${OUTPUT_DIR} successfully.${RESET}" + fi + echoc "${GREEN}This script will mount ${OUTPUT_DIR} into the Docker container as Slips output directory in /StratosphereLinuxIPS/output.\n ${RESET}" +} + +setup_iptables_persistence() { + # Persistence here is a systemd unit that watches for iptables changes and saves them whenever a change is detected + echoc "${BLUE}Setting up iptables persistence...${RESET}" + + # Install required packages + apt-get update -y + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends iptables-persistent netfilter-persistent + + # Enable and start netfilter-persistent as fallback + systemctl enable netfilter-persistent || true + systemctl restart netfilter-persistent || true + netfilter-persistent save || iptables-save > /etc/iptables/rules.v4 || true + + # Deploy custom systemd units + UNIT_DIR="/etc/systemd/system" + SRC_DIR="$(pwd)/iptables_autosave" + + # copy each unit file in iptables_autosave to the systemd directory + for unit in iptables-watcher.service iptables-watcher.timer; do + if [[ -f "$SRC_DIR/$unit" ]]; then + cp -f "$SRC_DIR/$unit" "$UNIT_DIR/$unit" + chmod 644 "$UNIT_DIR/$unit" + echoc "${GREEN}Copied $unit to $UNIT_DIR${RESET}" + else + echoc "${RED}File $SRC_DIR/$unit not found, skipping.${RESET}" + fi + done + + + # Deploy the check-iptables-hash.sh script + cp -f "$SRC_DIR/check-iptables-hash.sh" "/usr/local/bin/check-iptables-hash.sh" + chmod +x /usr/local/bin/check-iptables-hash.sh + + + # Reload systemd, enable and start units + systemctl daemon-reload + systemctl enable iptables-watcher.service + systemctl enable iptables-watcher.timer + systemctl start iptables-watcher.timer + + echoc "${GREEN}Done setting up iptables persistence using iptables-watcher units.${RESET}" + echoc "${BOLD}You can check the status with: ${RESET} sudo systemctl status iptables-watcher.timer\n" +} + + +create_slips_runner_script() { + # Creates the slips-runner.sh script from template slips-runner-template.sh + RUNNER_PATH="/usr/local/bin/slips-runner.sh" + TEMPLATE="./slips-runner-template.sh" + LOG_FILE="${CWD}/slips_container.log" + + echoc "${BLUE}Creating runner script from template for slips systemd unit to use...${RESET}" + [ -f "$TEMPLATE" ] || { echoc "${RED}Template not found: $TEMPLATE${RESET}"; exit 1; } + echoc "PS: This Slips runner script doesn't start slips with the blocking modules enabled, modify the Slips command in ${TEMPLATE} + if you want to enable them and rerun this script for the changes to take effect." + export WIFI_IF ETH_IF CWD LOG_FILE + envsubst '$WIFI_IF $ETH_IF $CWD $LOG_FILE' < "$TEMPLATE" > "$RUNNER_PATH" + chmod +x "$RUNNER_PATH" + echoc "${GREEN}Runner created at $RUNNER_PATH.${RESET}" +} + +create_slips_systemd_unit() { + SERVICE_PATH="/etc/systemd/system/slips.service" + TEMPLATE="./slips.service.template" + + echoc "${BLUE}Creating slips systemd service from template ./slips.service.template ...${RESET}" + [ -f "$TEMPLATE" ] || { echoc "${RED}Template not found: $TEMPLATE${RESET}"; exit 1; } + + # Ensure all needed vars are exported for envsubst + export WIFI_IF ETH_IF CWD LOG_FILE + export OUTPUT_DIR="$CWD/output" + export CONFIG_DIR="$CWD/config" + export CONTAINER_NAME="slips" + export DOCKER_IMAGE="stratosphereips/slips:latest" + export RUNNER_PATH="/usr/local/bin/slips-runner.sh" + + envsubst < "$TEMPLATE" > "$SERVICE_PATH" + + systemctl daemon-reload + systemctl enable slips.service + systemctl restart slips.service + echoc "${GREEN}Slips systemd service installed and started.${RESET}" + echoc "${BOLD}You can check the status with: ${RESET} sudo systemctl status slips\n" +} + + +main() { + parse_interfaces "$@" + ensure_root "$@" + ensure_create_ap_is_running + + create_directories + setup_iptables_persistence + create_slips_runner_script + create_slips_systemd_unit + + echoc "${YELLOW}Slips is running inside tmux in Docker.${RESET}" + echoc "You can attach using: ${BOLD}docker exec -it slips${RESET}" + echoc "For container logs check: ${BOLD}${CWD}/slips_container.log${RESET}" +} + +main "$@" diff --git a/rpi_scripts/iptables_autosave/check-iptables-hash.sh b/rpi_scripts/iptables_autosave/check-iptables-hash.sh new file mode 100644 index 0000000000..a43e51b67e --- /dev/null +++ b/rpi_scripts/iptables_autosave/check-iptables-hash.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +HASH_FILE="/var/run/iptables.hash" + +# Get the current ruleset and hash it +CURRENT_HASH=$(/usr/sbin/iptables-save | sha256sum) + +# If the hash file doesn't exist, create it and exit +if [ ! -f "$HASH_FILE" ]; then + echo "$CURRENT_HASH" > "$HASH_FILE" + exit 0 +fi + +# Read the old hash +OLD_HASH=$(cat "$HASH_FILE") + +# Compare hashes +if [ "$CURRENT_HASH" != "$OLD_HASH" ]; then + # 1. Update the hash file with the new hash + echo "$CURRENT_HASH" > "$HASH_FILE" + + + # 2. Trigger the action service to reload rules + echo "Saving updated iptables rules." + netfilter-persistent save || iptables-save > /etc/iptables/rules.v4 || true + +else + # No changes, do nothing + : +fi diff --git a/rpi_scripts/iptables_autosave/iptables-watcher.service b/rpi_scripts/iptables_autosave/iptables-watcher.service new file mode 100644 index 0000000000..84d0a5ecb8 --- /dev/null +++ b/rpi_scripts/iptables_autosave/iptables-watcher.service @@ -0,0 +1,11 @@ +[Unit] +Description=Check for iptables rule changes +After=network.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/check-iptables-hash.sh + + +[Install] +WantedBy=multi-user.target diff --git a/rpi_scripts/iptables_autosave/iptables-watcher.timer b/rpi_scripts/iptables_autosave/iptables-watcher.timer new file mode 100644 index 0000000000..1eb7e43196 --- /dev/null +++ b/rpi_scripts/iptables_autosave/iptables-watcher.timer @@ -0,0 +1,12 @@ +[Unit] +Description=Run iptables-watcher.service every 10 seconds + +[Timer] +# run the service 5 seconds after boot +OnBootSec=5s +# then run the iptables-watcher every 10 seconds periodically +OnUnitActiveSec=10s +Unit=iptables-watcher.service + +[Install] +WantedBy=timers.target diff --git a/rpi_scripts/slips-runner-template.sh b/rpi_scripts/slips-runner-template.sh new file mode 100644 index 0000000000..3eb674171a --- /dev/null +++ b/rpi_scripts/slips-runner-template.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# This template script is to be copied and edited with actual values for running Slips in AP mode +# it's used by the slips service unit to run slips inside a docker container on boot and on failure + + +set -euo pipefail + +WIFI_IF="${WIFI_IF}" +ETH_IF="${ETH_IF}" +CWD="${CWD}" +OUTPUT_DIR="$CWD/output" +#CONFIG_DIR="$CWD/config" +CONTAINER_NAME="${CONTAINER_NAME:-slips}" +DOCKER_IMAGE="${DOCKER_IMAGE:-stratosphereips/slips:latest}" +LOG_FILE="${LOG_FILE}" + +log() { printf "%b\n" "$(date -Iseconds) - $*" | tee -a "$LOG_FILE"; } + +remove_existing_container() { + # Removes existing slips container if found, because we'll be starting a new one with the same name + if docker ps -a --format '{{.Names}}' | grep -xq "$CONTAINER_NAME"; then + log "Container '$CONTAINER_NAME' already exists. Removing it." + docker rm -f "$CONTAINER_NAME" || true + fi +} + +kill_ap_process() { + PATTERN="create_ap.*\b$WIFI_IF\b.*\b$ETH_IF\b" + log "Searching for AP processes matching: $PATTERN" + + pids=$(ps -ef | grep -E "$PATTERN" | grep -v grep | awk '{print $2}' || true) + + if [ -n "$pids" ]; then + log "Found AP process(es): $pids" + kill -TERM $pids + if [ $? -eq 0 ]; then + log "Successfully sent TERM to AP process(es)." + else + log "Failed to kill AP process(es)." + fi + else + log "No AP processes found." + fi +} + + +main() { + # Removes existing slips containers, restarts slips docker, and monitors status + # Stops the AP process on slips container exit to cut the user's internet to notice that something went wrong with slips + # and his traffic is no longer being inspected. + log "=== Slips Runner Started ===" + + remove_existing_container + docker_cmd=( + docker run -d -it + -v "$OUTPUT_DIR":/StratosphereLinuxIPS/output/ + # -v "$CONFIG_DIR":/StratosphereLinuxIPS/config/ + --name "$CONTAINER_NAME" + --net=host + --cpu-shares 800 + --memory 8g + --memory-swap 8g + --shm-size 512m + --cap-add NET_ADMIN + "$DOCKER_IMAGE" + bash -c "tmux new -s slips './slips.py -ap $WIFI_IF,$ETH_IF'; tmux wait-for -S slips_done" + ) + + log "Starting Slips container using command: ${docker_cmd[*]}" + + # Execute + container_id=$("${docker_cmd[@]}") + + log "Container started: $container_id" + + + # This loop blocks forever until the docker container dies. + # qx matches exactly "slips" container + while docker inspect -f '{{.State.Running}}' "$CONTAINER_NAME" 2>/dev/null | grep -qx "true" ; do + sleep 10 + status=$(docker inspect -f '{{.State.Status}}' "$CONTAINER_NAME" 2>/dev/null || echo "unknown") + log "Container status: $status" + done + + + ### Docker exited. Termination Logic #### :D + exit_code=$(docker inspect --format='{{.State.ExitCode}}' "$container_id" 2>/dev/null || echo 0) + log "Exited with code $exit_code" + + + # we want the user to notice that slips is no longer protecting their traffic, so we kill the AP process to cut internet access + kill_ap_process + + netfilter-persistent save || true + log "Runner finished." +} + +main "$@" diff --git a/rpi_scripts/slips.service.template b/rpi_scripts/slips.service.template new file mode 100644 index 0000000000..f6bfbd830f --- /dev/null +++ b/rpi_scripts/slips.service.template @@ -0,0 +1,32 @@ +[Unit] +Description=Slips Docker Runner with tmux +After=network.target docker.service +Wants=docker.service + +[Service] +Type=simple +ExecStart=${RUNNER_PATH} +# this will be handled by the runner script, no need to handle it here too # TODO +# ExecStop=/bin/bash -c 'docker rm -f slips >/dev/null 2>&1 || true' + +# always restart the service if it exits, no matter why (clean exit, crash, non-zero exit, etc.). +Restart=always + +# wait 5 seconds before restarting the service after it stops. +RestartSec=5 + +#KillMode=process +# kills child processes of the ExecStart command on shutdown. +KillMode=control-group + +# wait up to 10mins for slips service to complete its ExecStart command (to start). why? because it may pull docker image, do apt update, etc. so this takes time. +TimeoutStartSec=600 + +# wait 5 mins for the script to shutdown +TimeoutStopSec=300 +User=root +StandardOutput=append:${CWD}/slips_container.log +StandardError=append:${CWD}/slips_container.log + +[Install] +WantedBy=multi-user.target diff --git a/slips.py b/slips.py index e1a39a707e..ce0cd18033 100755 --- a/slips.py +++ b/slips.py @@ -50,7 +50,7 @@ daemon_status: dict = Daemon(slips).stop() # it takes about 5 seconds for the stop_slips msg # to arrive in the channel, so give slips time to stop - time.sleep(3) + time.sleep(5) if daemon_status["stopped"]: print("Daemon stopped.") else: diff --git a/slips/daemon.py b/slips/daemon.py index 028ee7f9db..659d983ff5 100644 --- a/slips/daemon.py +++ b/slips/daemon.py @@ -203,13 +203,16 @@ def start(self): try: with Lock(name=self.daemon_start_lock): if self.pid is not None: - self.print( - "pidfile already exists. Daemon already " "running?" + to_print = ( + f"pidfile {self.pidfile} already exists. " + f"Daemon already running?" ) + # to cli and to log file + self.print(to_print) + print(to_print) return self.print("Daemon starting...") - # Starts the daemon self.daemonize() @@ -251,10 +254,12 @@ def get_last_opened_daemon_info(self) -> Optional[Tuple[str]]: f" Slips won't be completely killed." ) - def killdaemon(self): - """Kill the damon process only (aka slips.py)""" + def _kill_self(self): + """Kill the daemon process only (aka slips.py)""" + # sending SIGINT to self.pid will only kill slips.py - # and the rest of it's children will be zombies + # and the rest of its children will be zombies + # sending SIGKILL to self.pid will only kill slips.py and the rest of # it's children will stay open in memory (not even zombies) try: @@ -279,6 +284,10 @@ def _is_running(self) -> bool: def stop(self): """Stops the daemon""" + # WARNING: this function shouldn't call shutdown_gracefully(). + # it should send a signal to the opened daemon via + # killdaemon(), and the + # opened daemon should call shutdown gracefully try: with Lock(name=self.daemon_stop_lock): @@ -288,7 +297,7 @@ def stop(self): "error": "Daemon is not running.", } - self.killdaemon() + self._kill_self() info: Tuple[str] = self.get_last_opened_daemon_info() if not info: @@ -304,31 +313,29 @@ def stop(self): self.stdout = "slips.log" self.logsfile = "slips.log" self.prepare_std_streams(output_dir) + self.logger = self.slips.proc_man.start_output_process( self.stderr, self.logsfile, stdout=self.stdout ) + self.slips.printer = Printer(self.logger, self.name) + self.db = DBManager( self.logger, output_dir, - port, + int(port), self.slips.conf, self.slips.pid, start_sqlite=False, flush_db=False, ) - self.slips.printer = Printer(self.logger, self.name) self.db.set_slips_mode("daemonized") self.slips.set_mode("daemonized", daemon=self) - # used in shutdown gracefully to print the name of the - # stopped file in slips.log + self.slips.input_information = self.db.get_input_file() self.slips.args = self.slips.conf.get_args() self.slips.db = self.db self.slips.proc_man.slips_logfile = self.logsfile - # WARNING: this function shouldn't call shutdown gracefully. - # it should send sigint to the opened daemon, and the - # opened daemon should call shutdown gracefully return { "stopped": True, "error": None, diff --git a/slips/main.py b/slips/main.py index 0b3104bb4b..7a80daefb3 100644 --- a/slips/main.py +++ b/slips/main.py @@ -27,6 +27,7 @@ from slips_files.common.slips_utils import utils from slips_files.common.style import green, yellow from slips_files.core.database.database_manager import DBManager +from slips_files.core.helpers.bloom_filters_manager import BFManager from slips_files.core.helpers.checker import Checker @@ -555,20 +556,10 @@ def start(self): self.profilers_manager.memory_profiler_init() if self.args.growing: - if self.input_type != "zeek_folder": - self.print( - f"Parameter -g should be used with " - f"-f not a {self.input_type} file. " - f"Ignoring -g. Analyzing {self.input_information} " - f"instead.", - verbose=1, - debug=3, - ) - else: - self.print( - f"Running on a growing zeek dir: {self.input_information}" - ) - self.db.set_growing_zeek_dir() + self.print( + f"Running on a growing zeek dir: " f"{self.args.growing}" + ) + self.db.set_growing_zeek_dir() # log the PID of the started redis-server # should be here after we're sure that the server was started @@ -596,6 +587,9 @@ def start(self): # if slips is given a .rdb file, don't load the # modules as we don't need them if not self.args.db: + self.bloom_filters_man: BFManager = ( + self.proc_man.init_bloom_filters_manager() + ) # update local files before starting modules # if wait_for_TI_to_finish is set to true in the config file, # slips will wait untill all TI files are updated before @@ -605,6 +599,10 @@ def start(self): ti_feeds=self.conf.wait_for_TI_to_finish(), ) self.print("Starting modules", 1, 0) + # initialize_filter must be called after the update manager + # is started, and before the modules start. why? because + # update manager updates the iocs that the bloom filters need + self.bloom_filters_man.initialize_filter() self.proc_man.load_modules() # give outputprocess time to print all the started modules time.sleep(0.5) diff --git a/slips_files/common/abstracts/imodule.py b/slips_files/common/abstracts/imodule.py index 73182aa618..8798fd9b94 100644 --- a/slips_files/common/abstracts/imodule.py +++ b/slips_files/common/abstracts/imodule.py @@ -11,6 +11,7 @@ Optional, ) from slips_files.common.printer import Printer +from slips_files.core.helpers.bloom_filters_manager import BFManager from slips_files.core.output import Output from slips_files.common.slips_utils import utils from slips_files.core.database.database_manager import DBManager @@ -38,6 +39,7 @@ def __init__( slips_args, conf, ppid: int, + bloom_filters_manager: BFManager, **kwargs, ): Process.__init__(self) @@ -53,6 +55,7 @@ def __init__( # used to tell all slips.py children to stop self.termination_event: Event = termination_event self.logger = logger + self.bloom_filters: BFManager = bloom_filters_manager self.printer = Printer(self.logger, self.name) self.db = DBManager( self.logger, self.output_dir, self.redis_port, self.conf, self.ppid diff --git a/slips_files/common/parsers/arg_parser.py b/slips_files/common/parsers/arg_parser.py index eeb903a47a..aa5b7d1b18 100644 --- a/slips_files/common/parsers/arg_parser.py +++ b/slips_files/common/parsers/arg_parser.py @@ -275,9 +275,11 @@ def parse_arguments(self): self.add_argument( "-g", "--growing", - action="store_true", + action="store", + metavar="", required=False, - help="Treat the given zeek directory as growing. eg. zeek dirs generated when running on an interface", + help="Treat the given zeek directory as growing. eg. " + "Zeek dirs generated when running on an interface", ) self.add_argument( "-w", diff --git a/slips_files/common/parsers/config_parser.py b/slips_files/common/parsers/config_parser.py index 4c8c6f29fd..c44e62547c 100644 --- a/slips_files/common/parsers/config_parser.py +++ b/slips_files/common/parsers/config_parser.py @@ -48,7 +48,7 @@ def get_config_file(self): def get_parser(self, help=False): return ArgumentParser( - usage="./slips.py -c [options] [file]", add_help=help + usage="./slips.py -c [options]", add_help=help ) def get_args(self): @@ -341,9 +341,6 @@ def discovery_path(self): "exporting_alerts", "discovery_path", False ) - def inbox_path(self): - return self.read_configuration("exporting_alerts", "inbox_path", False) - def push_delay(self): # 3600 = 1h delay = self.read_configuration("exporting_alerts", "push_delay", 3600) @@ -368,11 +365,6 @@ def taxii_password(self): "exporting_alerts", "taxii_password", False ) - def jwt_auth_path(self): - return self.read_configuration( - "exporting_alerts", "jwt_auth_path", False - ) - def long_connection_threshold(self): """ returns threshold in seconds @@ -644,7 +636,7 @@ def get_disabled_modules(self, input_type: str) -> list: to_ignore.append("p2ptrust") use_global_p2p = self.use_global_p2p() - if not (use_global_p2p and ("-i" in sys.argv or "-g" in sys.argv)): + if not (use_global_p2p and ("-i" in sys.argv)): to_ignore.append("fidesModule") to_ignore.append("irisModule") diff --git a/slips_files/common/slips_utils.py b/slips_files/common/slips_utils.py index 34ff026a56..1cccf87a9e 100644 --- a/slips_files/common/slips_utils.py +++ b/slips_files/common/slips_utils.py @@ -81,6 +81,7 @@ def __init__(self): self.alerts_format = "%Y/%m/%d %H:%M:%S.%f%z" self.local_tz = self.get_local_timezone() self.aid = aid_hash.AID() + self.used_inetrface = None def generate_uid(self): """Generates a UID similar to what Zeek uses.""" @@ -206,21 +207,6 @@ def get_interface_of_ip(self, ip, db, args) -> str: if ip_obj in ipaddress.IPv4Network(local_net): return interface - def infer_used_interface(self) -> str | None: - """for when the user is using -g and didnt give slips an interface""" - # PS: make sure you neveer run this when slips is given a file or a - # pcap - try: - gateways = netifaces.gateways() - default_gateway = gateways.get("default", {}) - if netifaces.AF_INET not in default_gateway: - return None - - interface = default_gateway[netifaces.AF_INET][1] - return interface - except KeyError: - return - def get_gateway_for_iface(self, iface: str) -> Optional[str]: """returns the default gateway for the given interface""" gws = netifaces.gateways() @@ -506,8 +492,6 @@ def get_all_interfaces(self, args) -> List[str]: return [args.interface] if args.access_point: return args.access_point.split(",") - if args.growing: - return [self.infer_used_interface()] return ["default"] @@ -551,7 +535,7 @@ def get_own_ips(self, ret="Dict") -> dict[str, list[str]] | list[str]: :kwarg ret: "Dict" or "List" and returns a list of all the ips combined if ret=List is given """ - if "-i" not in sys.argv and "-g" not in sys.argv: + if "-i" not in sys.argv: # this method is only valid when running on an interface return [] diff --git a/slips_files/core/database/database_manager.py b/slips_files/core/database/database_manager.py index c46671e90d..236d2085dc 100644 --- a/slips_files/core/database/database_manager.py +++ b/slips_files/core/database/database_manager.py @@ -10,7 +10,7 @@ Dict, ) -from modules.p2ptrust.trust.trustdb import TrustDB + from slips_files.common.printer import Printer from slips_files.common.slips_utils import utils from slips_files.core.database.redis_db.database import RedisDB @@ -52,6 +52,10 @@ def __init__( self.trust_db = None if self.conf.use_local_p2p(): + # import this on demand because slips light version doesn't + # include the P2P dir + from modules.p2ptrust.trust.trustdb import TrustDB + self.trust_db_path: str = self.init_p2ptrust_db() self.trust_db = TrustDB( self.logger, @@ -66,6 +70,7 @@ def __init__( self.sqlite = None if start_sqlite: self.sqlite = SQLiteDB(self.logger, output_dir, main_pid) + self.all_interfaces = utils.get_all_interfaces(self.conf.get_args()) def is_db_malformed(self, db_path: str) -> bool: try: @@ -326,9 +331,6 @@ def set_dns_resolution(self, *args, **kwargs): def set_domain_resolution(self, *args, **kwargs): return self.rdb.set_domain_resolution(*args, **kwargs) - def get_redis_server_pid(self, *args, **kwargs): - return self.rdb.get_redis_server_pid(*args, **kwargs) - def set_slips_mode(self, *args, **kwargs): return self.rdb.set_slips_mode(*args, **kwargs) @@ -365,8 +367,8 @@ def get_modified_tw(self, *args, **kwargs): def get_field_separator(self, *args, **kwargs): return self.rdb.get_field_separator(*args, **kwargs) - def store_tranco_whitelisted_domain(self, *args, **kwargs): - return self.rdb.store_tranco_whitelisted_domain(*args, **kwargs) + def store_tranco_whitelisted_domains(self, *args, **kwargs): + return self.rdb.store_tranco_whitelisted_domains(*args, **kwargs) def is_whitelisted_tranco_domain(self, *args, **kwargs): return self.rdb.is_whitelisted_tranco_domain(*args, **kwargs) @@ -476,6 +478,9 @@ def get_pids(self, *args, **kwargs): def set_org_info(self, *args, **kwargs): return self.rdb.set_org_info(*args, **kwargs) + def set_org_cidrs(self, *args, **kwargs): + return self.rdb.set_org_cidrs(*args, **kwargs) + def get_org_info(self, *args, **kwargs): return self.rdb.get_org_info(*args, **kwargs) @@ -485,12 +490,21 @@ def get_org_ips(self, *args, **kwargs): def set_whitelist(self, *args, **kwargs): return self.rdb.set_whitelist(*args, **kwargs) - def get_all_whitelist(self, *args, **kwargs): - return self.rdb.get_all_whitelist(*args, **kwargs) - def get_whitelist(self, *args, **kwargs): return self.rdb.get_whitelist(*args, **kwargs) + def is_whitelisted(self, *args, **kwargs): + return self.rdb.is_whitelisted(*args, **kwargs) + + def is_domain_in_org_domains(self, *args, **kwargs): + return self.rdb.is_domain_in_org_domains(*args, **kwargs) + + def is_asn_in_org_asn(self, *args, **kwargs): + return self.rdb.is_asn_in_org_asn(*args, **kwargs) + + def is_ip_in_org_ips(self, *args, **kwargs): + return self.rdb.is_ip_in_org_cidrs(*args, **kwargs) + def has_cached_whitelist(self, *args, **kwargs): return self.rdb.has_cached_whitelist(*args, **kwargs) @@ -555,24 +569,38 @@ def get_flows_causing_evidence(self, *args, **kwargs): """returns the list of uids of the flows causing evidence""" return self.rdb.get_flows_causing_evidence(*args, **kwargs) - def _get_evidence_interface(self, evidence: Evidence) -> str | None: + def _get_evidence_interface(self, evidence: Evidence) -> str: """ Returns the interface of the first flow of the given evidence + PS: this function HAS TO return something, or else we wouldn't be + able to set an evidence without an interface. if slips is + completely unable to return the used interface, it returns "default" """ - try: - # get any flow uid of this evidence, to get the interface of it - uid = evidence.uid[0] - except KeyError: - # evidence doesnt have a uid? - return - - try: - flow: str = self.get_flow(uid)[uid] - if isinstance(flow, str): - flow: dict = json.loads(flow) - except KeyError: - flow: dict = self.get_altflow_from_uid(uid) - return flow["interface"] if flow else None + # when slips is only monitoring 1 interface, must be it + if not self.all_interfaces: + self.interface = "default" + return "default" + + if len(self.all_interfaces) == 1: + return self.all_interfaces[0] + + elif len(self.all_interfaces) == 2: + # slips is running with -ap + try: + # get any flow uid of this evidence, to get the interface + # of it + uid = evidence.uid[0] + except (KeyError, IndexError, AttributeError): + # evidence doesnt have a uid? + return "default" + + try: + flow: str = self.get_flow(uid)[uid] + if isinstance(flow, str): + flow: dict = json.loads(flow) + except KeyError: + flow: dict = self.get_altflow_from_uid(uid) + return "default" if not flow else flow["interface"] def set_evidence(self, evidence: Evidence): interface: str | None = self._get_evidence_interface(evidence) diff --git a/slips_files/core/database/redis_db/alert_handler.py b/slips_files/core/database/redis_db/alert_handler.py index c4ead0f5fb..c9c747f794 100644 --- a/slips_files/core/database/redis_db/alert_handler.py +++ b/slips_files/core/database/redis_db/alert_handler.py @@ -53,7 +53,7 @@ def mark_profile_as_malicious(self, profileid: ProfileID): def get_malicious_profiles(self): """returns profiles that generated an alert""" - self.r.smembers(self.constants.MALICIOUS_PROFILES) + return self.r.smembers(self.constants.MALICIOUS_PROFILES) def set_evidence_causing_alert(self, alert: Alert): """ @@ -272,7 +272,7 @@ def set_evidence(self, evidence: Evidence): not self.is_whitelisted_evidence(evidence.id) ): self.r.hset(evidence_hash, evidence.id, evidence_to_send) - self.r.incr(self.constants.NUMBER_OF_EVIDENCE, 1) + self.r.incr(self.constants.NUMBER_OF_EVIDENCE) self.publish(self.channels.EVIDENCE_ADDED, evidence_to_send) return True @@ -310,7 +310,7 @@ def delete_evidence(self, profileid, twid, evidence_id: str): # which means that any evidence passed to this function # can never be a part of a past alert self.r.hdel(f"{profileid}_{twid}_evidence", evidence_id) - self.r.incr(self.constants.NUMBER_OF_EVIDENCE, -1) + self.r.decr(self.constants.NUMBER_OF_EVIDENCE) def cache_whitelisted_evidence_id(self, evidence_id: str): """ diff --git a/slips_files/core/database/redis_db/constants.py b/slips_files/core/database/redis_db/constants.py index 35b135fbd4..0f5ff8f5ed 100644 --- a/slips_files/core/database/redis_db/constants.py +++ b/slips_files/core/database/redis_db/constants.py @@ -34,7 +34,6 @@ class Constants: PIDS = "PIDs" MAC = "MAC" MODIFIED_TIMEWINDOWS = "ModifiedTW" - ORG_INFO = "OrgInfo" ACCUMULATED_THREAT_LEVELS = "accumulated_threat_levels" TRANCO_WHITELISTED_DOMAINS = "tranco_whitelisted_domains" WHITELIST = "whitelist" @@ -66,7 +65,7 @@ class Constants: BLOCKED_PROFILES_AND_TWS = "BlockedProfTW" PROFILES = "profiles" NUMBER_OF_ALERTS = "number_of_alerts" - KNOWN_FPS = "known_fps" + KNOWN_FP_MD5_HASHES = "known_fps" WILL_SLIPS_HAVE_MORE_FLOWS = "will_slips_have_more_flows" SUBS_WHO_PROCESSED_MSG = "number_of_subscribers_who_processed_this_msg" FLOWS_ANALYZED_BY_ALL_MODULES_PER_MIN = "flows_analyzed_per_minute" diff --git a/slips_files/core/database/redis_db/database.py b/slips_files/core/database/redis_db/database.py index 409e8d0343..8aa0f8dea8 100644 --- a/slips_files/core/database/redis_db/database.py +++ b/slips_files/core/database/redis_db/database.py @@ -1,5 +1,7 @@ # SPDX-FileCopyrightText: 2021 Sebastian Garcia # SPDX-License-Identifier: GPL-2.0-only +import socket + from slips_files.common.printer import Printer from slips_files.common.slips_utils import utils from slips_files.common.parsers.config_parser import ConfigParser @@ -13,7 +15,6 @@ from slips_files.core.database.redis_db.p2p_handler import P2PHandler import os -import signal import redis import time import json @@ -29,6 +30,7 @@ ) RUNNING_IN_DOCKER = os.environ.get("IS_IN_A_DOCKER_CONTAINER", False) +LOCALHOST = "127.0.0.1" class RedisDB(IoCHandler, AlertHandler, ProfileHandler, P2PHandler): @@ -60,7 +62,6 @@ class RedisDB(IoCHandler, AlertHandler, ProfileHandler, P2PHandler): "new_notice", "new_url", "new_downloaded_file", - "reload_whitelist", "new_service", "new_arp", "new_MAC", @@ -240,6 +241,20 @@ def get_slips_start_time(cls) -> str: """get the time slips started in unix format""" return cls.r.get(cls.constants.SLIPS_START_TIME) + @classmethod + def should_flush_db(cls) -> bool: + """ + these are the cases that we DO NOT flush the db when we + connect to it, because we need to use it + -d means Read an analysed file (rdb) from disk. + -S stop daemon + -cb clears the blocking chain + """ + will_need_the_db_later = ( + "-S" in sys.argv or "-cb" in sys.argv or "-d" in sys.argv + ) + return cls.deletePrevdb and cls.flush_db and not will_need_the_db_later + @classmethod def init_redis_server(cls) -> Tuple[bool, str]: """ @@ -252,23 +267,15 @@ def init_redis_server(cls) -> Tuple[bool, str]: # starts the redis server using cli. # we don't need that when using -k cls._start_a_redis_server() + all_good, err = cls._confirm_redis_is_listening() + if not all_good: + return False, err connected, err = cls.connect_to_redis_server() if not connected: return False, err - # these are the cases that we DO NOT flush the db when we - # connect to it, because we need to use it - # -d means Read an analysed file (rdb) from disk. - # -S stop daemon - # -cb clears the blocking chain - if ( - cls.deletePrevdb - and not ( - "-S" in sys.argv or "-cb" in sys.argv or "-d" in sys.argv - ) - and cls.flush_db - ): + if cls.should_flush_db(): # when stopping the daemon, don't flush bc we need to get # the PIDS to close slips files cls.r.flushdb() @@ -314,11 +321,35 @@ def _connect(port: int, db: int) -> redis.StrictRedis: health_check_interval=20, ) + @classmethod + def _confirm_redis_is_listening(cls, timeout: float = 5.0) -> (bool, str): + """ + Polls the redis port to confirm Redis is really listening + :param timeout: how long to keep polling before raising runtime error + """ + start = time.time() + while time.time() - start < timeout: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.settimeout(0.2) + try: + sock.connect((LOCALHOST, cls.redis_port)) + return True, "" # Redis is up + except (ConnectionRefusedError, OSError): + time.sleep(0.2) + + # If we reach here, port never opened + return False, ( + f"_confirm_redis_is_listening: Redis failed to start on " + f"{cls.redis_port}" + ) + @classmethod def _start_a_redis_server(cls) -> bool: cmd = ( - f"redis-server {cls._conf_file} --port {cls.redis_port} " - f" --daemonize yes" + f"redis-server {cls._conf_file} " + f"--port {cls.redis_port} " + f"--bind {LOCALHOST} " + f"--daemonize yes" ) process = subprocess.Popen( cmd, @@ -366,11 +397,6 @@ def connect_to_redis_server(cls) -> Tuple[bool, str]: except Exception as e: return False, f"database.connect_to_redis_server: {e}" - @classmethod - def close_redis_server(cls, redis_port): - if server_pid := cls.get_redis_server_pid(redis_port): - os.kill(int(server_pid), signal.SIGKILL) - @classmethod def change_redis_limits(cls, client: redis.StrictRedis): """ @@ -404,7 +430,7 @@ def publish(self, channel, msg): self.r.hincrby(self.constants.MSGS_PUBLISHED_AT_RUNTIME, channel, 1) self.r.publish(channel, msg) - def get_msgs_published_in_channel(self, channel: str) -> int: + def get_msgs_published_in_channel(self, channel: str) -> int | None: """returns the number of msgs published in a channel""" return self.r.hget(self.constants.MSGS_PUBLISHED_AT_RUNTIME, channel) @@ -916,7 +942,7 @@ def should_store_resolution( # don't store this as a valid dns resolution if query != "localhost": for answer in answers: - if answer in ("127.0.0.1", "0.0.0.0"): + if answer in (LOCALHOST, "0.0.0.0"): return False return True @@ -1032,20 +1058,6 @@ def set_domain_resolution(self, domain, ips): """ self.r.hset(self.constants.DOMAINS_RESOLVED, domain, json.dumps(ips)) - @staticmethod - def get_redis_server_pid(redis_port): - """ - get the PID of the redis server started on the given redis_port - retrns the pid - """ - cmd = "ps aux | grep redis-server" - cmd_output = os.popen(cmd).read() - for line in cmd_output.splitlines(): - if str(redis_port) in line: - pid = line.split()[1] - return pid - return False - def set_slips_mode(self, slips_mode): """ function to store the current mode (daemonized/interactive) @@ -1139,11 +1151,11 @@ def _determine_gw_mac(self, ip, mac, interface: str): return True return False - def get_ip_of_mac(self, MAC): + def get_ip_of_mac(self, mac_addr: str): """ Returns the IP associated with the given MAC in our database """ - return self.r.hget(self.constants.MAC, MAC) + return self.r.hget(self.constants.MAC, mac_addr) def get_modified_tw(self): """Return all the list of modified tw""" @@ -1156,14 +1168,14 @@ def get_field_separator(self): """Return the field separator""" return self.separator - def store_tranco_whitelisted_domain(self, domain): + def store_tranco_whitelisted_domains(self, domains: List[str]): """ - store whitelisted domain from tranco whitelist in the db + store whitelisted domains from tranco whitelist in the db """ # the reason we store tranco whitelisted domains in the cache db # instead of the main db is, we don't want them cleared on every new # instance of slips - self.rcache.sadd(self.constants.TRANCO_WHITELISTED_DOMAINS, domain) + self.rcache.sadd(self.constants.TRANCO_WHITELISTED_DOMAINS, *domains) def is_whitelisted_tranco_domain(self, domain): return self.rcache.sismember( @@ -1250,7 +1262,8 @@ def get_ip_identification( def get_multiaddr(self): """ - this is can only be called when p2p is enabled, this value is set by p2p pigeon + this can only be called when p2p is enabled, + this value is set by p2p pigeon in the db """ return self.r.get(self.constants.MULTICAST_ADDRESS) @@ -1538,81 +1551,147 @@ def get_name_of_module_at(self, given_pid): if int(given_pid) == int(pid): return name - def set_org_info(self, org, org_info, info_type): + def set_org_cidrs(self, org, org_ips: Dict[str, List[str]]): """ - store ASN, IP and domains of an org in the db + stores CIDRs of an org in the db :param org: supported orgs are ('google', 'microsoft', 'apple', 'facebook', 'twitter') - : param org_info: a json serialized list of asns or ips or domains - :param info_type: supported types are 'asn', 'domains', 'IPs' + :param org_ips: A dict with the first octet of a cidr, + and the full cidr as keys. + something like { + '2401': ['2401:fa00::/42', '2401:fa00:4::/48'] + '70': ['70.32.128.0/19','70.32.136.0/24'] + } """ - # info will be stored in OrgInfo key {'facebook_asn': .., - # 'twitter_domains': ...} - self.rcache.hset( - self.constants.ORG_INFO, f"{org}_{info_type}", org_info - ) + key = f"{org}_IPs" + if isinstance(org_ips, dict): + serializable = {str(k): json.dumps(v) for k, v in org_ips.items()} + self.rcache.hset(key, mapping=serializable) - def get_org_info(self, org, info_type) -> str: + def set_org_info(self, org, org_info: List[str], info_type: str): """ - get the ASN, IP and domains of an org from the db + store ASN or domains of an org in the db + :param org: supported orgs are ('google', 'microsoft', + 'apple', 'facebook', 'twitter') + : param org_info: a list of asns or ips or domains + :param info_type: supported types are 'asn' or 'domains' + NOTE: this function doesnt store org IPs, pls use set_org_ips() + instead + """ + # info will be stored in redis SETs like 'facebook_asn', + # 'twitter_ips', etc. + key = f"{org}_{info_type}" + if isinstance(org_info, list): + self.rcache.sadd(key, *org_info) + + def get_org_info(self, org, info_type: str) -> List[str]: + """ + Returns the ASN or domains of an org from the db + :param org: supported orgs are ('google', 'microsoft', 'apple', 'facebook', 'twitter') - :param info_type: supported types are 'asn', 'domains' - returns a json serialized dict with info + :param info_type: supported types are 'asn' or 'domains' + + returns a List[str] of the required info PS: All ASNs returned by this function are uppercase """ - return ( - self.rcache.hget(self.constants.ORG_INFO, f"{org}_{info_type}") - or "[]" - ) + key = f"{org}_{info_type}" + return self.rcache.smembers(key) - def get_org_ips(self, org): - org_info = self.rcache.hget(self.constants.ORG_INFO, f"{org}_IPs") + def is_domain_in_org_domains(self, org: str, domain: str) -> bool: + """ + checks if the given domain is in the org's domains set + :param org: supported orgs are ('google', 'microsoft', 'apple', + 'facebook', 'twitter') + :param domain: domain to check + :return: True if the domain is in the org's domains set, False otherwise + """ + key = f"{org}_domains" + return True if self.rcache.sismember(key, domain) else False - if not org_info: - org_info = {} - return org_info + def is_asn_in_org_asn(self, org: str, asn: str) -> bool: + """ + checks if the given asn is in the org's asns set + :param org: supported orgs are ('google', 'microsoft', 'apple', + 'facebook', 'twitter') + :param asn: asn to check + :return: True if the asn is in the org's asns set, False otherwise + """ + key = f"{org}_asn" + return True if self.rcache.sismember(key, asn) else False - try: - return json.loads(org_info) - except TypeError: - # it's a dict - return org_info + def is_ip_in_org_cidrs( + self, org: str, first_octet: str + ) -> List[str] | None: + """ + checks if the given first octet in the org's octets + :param org: supported orgs are ('google', 'microsoft', 'apple', + 'facebook', 'twitter') + :param ip: ip to check + :return: a list of cidrs the given ip may belong to, None otherwise + """ + key = f"{org}_IPs" + return self.r.hget(key, first_octet) - def set_whitelist(self, type_, whitelist_dict): + def get_org_ips(self, org: str) -> Dict[str, str]: """ - Store the whitelist_dict in the given key - :param type_: supported types are IPs, domains, macs and organizations - :param whitelist_dict: the dict of IPs,macs, domains or orgs to store + returns Dict[str, str] + keys are subnet first octets + values are serialized list of cidrs + e.g { + '2401': ['2401:fa00::/42', '2401:fa00:4::/48'] + '70': ['70.32.128.0/19','70.32.136.0/24'] + } """ - self.r.hset( - self.constants.WHITELIST, type_, json.dumps(whitelist_dict) - ) + key = f"{org}_IPs" + org_info = self.rcache.hgetall(key) + return org_info if org_info else {} - def get_all_whitelist(self) -> Optional[Dict[str, dict]]: + def set_whitelist(self, type_, whitelist_dict: Dict[str, Dict[str, str]]): """ - Returns a dict with the following keys from the whitelist - 'mac', 'organizations', 'IPs', 'domains' + Store the whitelist_dict in the given key + :param type_: supported types are IPs, domains, macs and organizations + :param whitelist_dict: the dict of IPs,macs, domains or orgs to store """ - whitelist: Optional[Dict[str, str]] = self.r.hgetall( - self.constants.WHITELIST - ) - if whitelist: - whitelist = {k: json.loads(v) for k, v in whitelist.items()} - return whitelist + key = f"{self.constants.WHITELIST}_{type_}" + # Pre-serialize all values + data = {ioc: json.dumps(info) for ioc, info in whitelist_dict.items()} + # Send all at once + if data: + self.r.hset(key, mapping=data) def get_whitelist(self, key: str) -> dict: """ - Whitelist supports different keys like : IPs domains - and organizations - this function is used to check if we have any of the - above keys whitelisted + Return ALL the whitelisted IoCs of key type + Whitelist supports different keys like : "IPs", "domains", + "organizations" or "macs" """ - if whitelist := self.r.hget(self.constants.WHITELIST, key): - return json.loads(whitelist) + key = f"{self.constants.WHITELIST}_{key}" + if whitelist := self.r.hgetall(key): + return whitelist else: return {} + def is_whitelisted(self, ioc: str, type_: str) -> str | None: + """ + Check if a given ioc (IP, domain, or MAC) is whitelisted. + + :param ioc: The ioc to check; IP address, domain, or MAC + :param type_: The type of ioc to check. Supported types: 'IPs', + 'domains', 'macs'. + :return: a serialized dict with the whitelist info of the given ioc + :raises ValueError: If the provided type_ is not supported. + """ + valid_types = {"IPs", "domains", "macs"} + if type_ not in valid_types: + raise ValueError( + f"Unsupported whitelist type: {type_}. " + f"Must be one of {valid_types}." + ) + + key = f"{self.constants.WHITELIST}_{type_}" + return self.r.hget(key, ioc) + def has_cached_whitelist(self) -> bool: return bool(self.r.exists(self.constants.WHITELIST)) diff --git a/slips_files/core/database/redis_db/ioc_handler.py b/slips_files/core/database/redis_db/ioc_handler.py index f17483f056..32ccac28b3 100644 --- a/slips_files/core/database/redis_db/ioc_handler.py +++ b/slips_files/core/database/redis_db/ioc_handler.py @@ -168,12 +168,12 @@ def set_ti_feed_info(self, file, data): self.rcache.hset(self.constants.TI_FILES_INFO, file, data) def store_known_fp_md5_hashes(self, fps: Dict[str, List[str]]): - self.rcache.hmset(self.constants.KNOWN_FPS, fps) + self.rcache.hmset(self.constants.KNOWN_FP_MD5_HASHES, fps) def is_known_fp_md5_hash(self, hash: str) -> Optional[str]: """returns the description of the given hash if it is a FP. and - returns Fals eif the hash is not a FP""" - return self.rcache.hmget(self.constants.KNOWN_FPS, hash) + returns False if the hash is not a FP""" + return self.rcache.hmget(self.constants.KNOWN_FP_MD5_HASHES, hash) def delete_ips_from_ioc_ips(self, ips: List[str]): """ diff --git a/slips_files/core/evidence_handler.py b/slips_files/core/evidence_handler.py index 9ec213dcff..085f6a2646 100644 --- a/slips_files/core/evidence_handler.py +++ b/slips_files/core/evidence_handler.py @@ -21,6 +21,8 @@ # stratosphere@aic.fel.cvut.cz import json +import multiprocessing +import threading from typing import ( List, Dict, @@ -31,7 +33,6 @@ import sys import os import time -import traceback from slips_files.common.idmefv2 import IDMEFv2 from slips_files.common.style import ( @@ -39,6 +40,7 @@ ) from slips_files.common.parsers.config_parser import ConfigParser from slips_files.common.slips_utils import utils +from slips_files.core.evidence_logger import EvidenceLogger from slips_files.core.helpers.whitelist.whitelist import Whitelist from slips_files.core.helpers.notify import Notify from slips_files.common.abstracts.icore import ICore @@ -64,7 +66,7 @@ class EvidenceHandler(ICore): name = "EvidenceHandler" def init(self): - self.whitelist = Whitelist(self.logger, self.db) + self.whitelist = Whitelist(self.logger, self.db, self.bloom_filters) self.idmefv2 = IDMEFv2(self.logger, self.db) self.separator = self.db.get_separator() self.read_configuration() @@ -103,10 +105,25 @@ def init(self): self.our_ips: List[str] = utils.get_own_ips(ret="List") self.formatter = EvidenceFormatter(self.db, self.args) # thats just a tmp value, this variable will be set and used when - # the - # module is stopping. + # the module is stopping. self.last_msg_received_time = time.time() + # A thread that handing I/O to disk (writing evidence to log files) + self.logger_stop_signal = threading.Event() + self.evidence_logger_q = multiprocessing.Queue() + self.evidence_logger = EvidenceLogger( + stop_signal=self.logger_stop_signal, + evidence_logger_q=self.evidence_logger_q, + logfile=self.logfile, + jsonfile=self.jsonfile, + ) + self.logger_thread = threading.Thread( + target=self.evidence_logger.run_logger_thread, + daemon=True, + name="thread_that_handles_evidence_logging_to_disk", + ) + utils.start_thread(self.logger_thread, self.db) + def read_configuration(self): conf = ConfigParser() self.width: float = conf.get_tw_width_as_float() @@ -148,13 +165,11 @@ def add_alert_to_json_log_file(self, alert: Alert): self.handle_unable_to_log(alert, "Can't convert to IDMEF alert") return - try: - json.dump(idmef_alert, self.jsonfile) - self.jsonfile.write("\n") - except KeyboardInterrupt: - return True - except Exception as e: - self.handle_unable_to_log(alert, e) + to_log = { + "to_log": idmef_alert, + "where": "alerts.json", + } + self.evidence_logger_q.put(to_log) def add_evidence_to_json_log_file( self, @@ -186,29 +201,26 @@ def add_evidence_to_json_log_file( ) } ) - json.dump(idmef_evidence, self.jsonfile) - self.jsonfile.write("\n") + + to_log = { + "to_log": idmef_evidence, + "where": "alerts.json", + } + + self.evidence_logger_q.put(to_log) + except KeyboardInterrupt: return True except Exception as e: self.handle_unable_to_log(evidence, e) - def add_to_log_file(self, data): + def add_to_log_file(self, data: str): """ Add a new evidence line to the alerts.log and other log files if logging is enabled. """ - try: - # write to alerts.log - self.logfile.write(data) - if not data.endswith("\n"): - self.logfile.write("\n") - self.logfile.flush() - except KeyboardInterrupt: - return True - except Exception: - self.print("Error in add_to_log_file()") - self.print(traceback.format_exc(), 0, 1) + to_log = {"to_log": data, "where": "alerts.log"} + self.evidence_logger_q.put(to_log) def log_alert(self, alert: Alert, blocked=False): """ @@ -239,6 +251,11 @@ def log_alert(self, alert: Alert, blocked=False): self.add_alert_to_json_log_file(alert) def shutdown_gracefully(self): + self.logger_stop_signal.set() + try: + self.logger_thread.join(timeout=5) + except Exception: + pass self.logfile.close() self.jsonfile.close() @@ -363,8 +380,24 @@ def send_to_exporting_module(self, tw_evidence: Dict[str, Evidence]): """ for evidence in tw_evidence.values(): evidence: Evidence - evidence: dict = utils.to_dict(evidence) - self.db.publish("export_evidence", json.dumps(evidence)) + evidence_dict: dict = utils.to_dict(evidence) + self.print( + f"[EvidenceHandler] Exporting evidence {evidence_dict.get('id')} " + f"type={evidence_dict.get('evidence_type')} via export_evidence.", + 2, + 0, + ) + self.db.publish("export_evidence", json.dumps(evidence_dict)) + + def publish_single_evidence(self, evidence: Evidence): + evidence_dict: dict = utils.to_dict(evidence) + self.print( + f"[EvidenceHandler] Export streaming {evidence_dict.get('id')} " + f"type={evidence_dict.get('evidence_type')} via export_evidence.", + 2, + 0, + ) + self.db.publish("export_evidence", json.dumps(evidence_dict)) def is_blocking_modules_supported(self) -> bool: """ @@ -522,7 +555,13 @@ def main(self): if msg := self.get_msg("evidence_added"): msg["data"]: str evidence: dict = json.loads(msg["data"]) - evidence: Evidence = dict_to_evidence(evidence) + try: + evidence: Evidence = dict_to_evidence(evidence) + except Exception as e: + self.print( + f"Problem converting {evidence} to dict: " f"{e}", 0, 1 + ) + continue profileid: str = str(evidence.profile) twid: str = str(evidence.timewindow) evidence_type: EvidenceType = evidence.evidence_type @@ -543,6 +582,9 @@ def main(self): # reaching this point, now remove evidence from db so # it could be completely ignored self.db.delete_evidence(profileid, twid, evidence.id) + self.print( + f"{self.whitelist.get_bloom_filters_stats()}", 2, 0 + ) continue # convert time to local timezone @@ -590,6 +632,9 @@ def main(self): accumulated_threat_level, ) + # stream every evidence toward exporting modules immediately + self.publish_single_evidence(evidence) + evidence_dict: dict = utils.to_dict(evidence) self.db.publish("report_to_peers", json.dumps(evidence_dict)) diff --git a/slips_files/core/evidence_logger.py b/slips_files/core/evidence_logger.py new file mode 100644 index 0000000000..1e8a37e52f --- /dev/null +++ b/slips_files/core/evidence_logger.py @@ -0,0 +1,69 @@ +import json +import queue +import threading +import traceback +from typing import TextIO + + +class EvidenceLogger: + def __init__( + self, + stop_signal: threading.Event, + evidence_logger_q: queue.Queue, + logfile: TextIO, + jsonfile: TextIO, + ): + self.stop_signal = stop_signal + self.evidence_logger_q = evidence_logger_q + self.logfile = logfile + self.jsonfile = jsonfile + + def print_to_alerts_logfile(self, data: str): + """ + Add a new evidence line to the alerts.log and other log files if + logging is enabled. + """ + try: + # write to alerts.log + self.logfile.write(data) + if not data.endswith("\n"): + self.logfile.write("\n") + self.logfile.flush() + except KeyboardInterrupt: + return True + except Exception: + self.print("Error in evidence_logger.print_to_alerts_logfile()") + self.print(traceback.format_exc(), 0, 1) + + def print_to_alerts_json(self, idmef_evidence: dict): + try: + json.dump(idmef_evidence, self.jsonfile) + self.jsonfile.write("\n") + except KeyboardInterrupt: + return + except Exception: + return + + def run_logger_thread(self): + """ + runs forever in a loop reveiving msgs from evidence_handler and + logging them to alert.log or alerts.json + to avoid blocking evidence handler when high traffic attacks are + happening, so slips can process evidence faster there while we log + as fast as possible here + """ + while not self.stop_signal.is_set(): + try: + msg = self.evidence_logger_q.get(timeout=1) + except queue.Empty: + continue + except Exception: + continue + + destination = msg["where"] + + if destination == "alerts.log": + self.print_to_alerts_logfile(msg["to_log"]) + + elif destination == "alerts.json": + self.print_to_alerts_json(msg["to_log"]) diff --git a/slips_files/core/helpers/bloom_filters_manager.py b/slips_files/core/helpers/bloom_filters_manager.py new file mode 100644 index 0000000000..4b93e63997 --- /dev/null +++ b/slips_files/core/helpers/bloom_filters_manager.py @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: 2021 Sebastian Garcia +# SPDX-License-Identifier: GPL-2.0-only +from typing import List, Dict + +from pybloom_live import BloomFilter + +from slips_files.common.slips_utils import utils +from slips_files.core.database.database_manager import DBManager +from slips_files.core.output import Output + + +class BFManager: + def __init__( + self, + logger: Output, + output_dir, + redis_port, + conf, + ppid: int, + ): + self.redis_port = redis_port + self.output_dir = output_dir + self.logger = logger + self.conf = conf + # the parent pid of this module, used for strating the db + self.ppid = ppid + self.db = DBManager( + self.logger, self.output_dir, self.redis_port, self.conf, self.ppid + ) + self.org_filters = {} + + def initialize_filter(self): + self._init_whitelisted_iocs_bf() + self._init_whitelisted_orgs_bf() + + def _init_whitelisted_iocs_bf(self): + self.domains = BloomFilter(capacity=10000, error_rate=0.001) + self.ips = BloomFilter(capacity=10000, error_rate=0.001) + self.mac_addrs = BloomFilter(capacity=10000, error_rate=0.001) + self.orgs = BloomFilter(capacity=100, error_rate=0.001) + + for ip in self.db.get_whitelist("IPs"): + self.ips.add(ip) + + for domain in self.db.get_whitelist("domains"): + self.domains.add(domain) + + for org in self.db.get_whitelist("organizations"): + self.orgs.add(org) + + for mac in self.db.get_whitelist("macs"): + self.mac_addrs.add(mac) + + def _init_whitelisted_orgs_bf(self): + """ + Updates the bloom filters with the whitelisted organization + domains, asns, and ips + fills the self.org_filters dict + is called from update_manager whether slips did update its local + org files or not. + this goal of calling this is to make sure slips has the bloom + filters in mem at all times. + """ + err_rate = 0.01 + for org in utils.supported_orgs: + domains_bloom = BloomFilter(capacity=10000, error_rate=err_rate) + asns_bloom = BloomFilter(capacity=10000, error_rate=err_rate) + cidrs_bloom = BloomFilter(capacity=100, error_rate=err_rate) + + domains: List[str] = self.db.get_org_info(org, "domains") + _ = [domains_bloom.add(domain) for domain in domains] + + asns: List[str] = self.db.get_org_info(org, "asn") + _ = [asns_bloom.add(asn) for asn in asns] + + org_subnets: Dict[str, str] = self.db.get_org_ips(org) + _ = [ + cidrs_bloom.add(first_octet) + for first_octet in org_subnets.keys() + ] + + self.org_filters[org] = { + "domains": domains_bloom, + "asns": asns_bloom, + "first_octets": cidrs_bloom, + } diff --git a/slips_files/core/helpers/checker.py b/slips_files/core/helpers/checker.py index be77927341..93d0851e16 100644 --- a/slips_files/core/helpers/checker.py +++ b/slips_files/core/helpers/checker.py @@ -21,6 +21,12 @@ def get_input_type(self) -> tuple: """ # only defined in stdin lines line_type = False + + if self.main.args.interface and self.main.args.growing: + input_information = self.main.args.growing + input_type = self.main.get_input_file_type(input_information) + return input_type, input_information, line_type + # -i or -ap if self.main.args.interface or self.main.args.access_point: input_information = ( @@ -77,7 +83,6 @@ def _check_mutually_exclusive_flags(self): self.main.args.filepath, # -f self.main.args.input_module, # -im ] - # Count how many of the flags are set (True) mutually_exclusive_flag_count = sum( bool(flag) for flag in mutually_exclusive_flags @@ -86,11 +91,29 @@ def _check_mutually_exclusive_flags(self): if mutually_exclusive_flag_count > 1: print( "Only one of the flags -i, -ap, -s, -d, or -f is allowed. " - "Stopping slips." + "Stopping Slips." ) self.main.terminate_slips() return + def _check_if_growing_zeek_dir_is_used_correctly(self): + """it should be used with -i. something like -g -i + """ + if not self.main.args.growing: + return + + usage = "Usage: -g -i ." + if not self.main.args.interface: + print( + f"{usage}\n" + "You need to define an interface with -i. Stopping Slips" + ) + self.main.terminate_slips() + + if self.main.args.filepath: + print(f"{usage}\n" "-f shouldn't be used with -g. Stopping Slips") + self.main.terminate_slips() + def _check_if_root_is_required(self): if (self.main.args.save or self.main.args.db) and os.getuid() != 0: print("Saving and loading the database requires root privileges.") @@ -146,7 +169,6 @@ def _is_slips_running_non_stop(self) -> bool: return ( self.main.args.interface or self.main.args.access_point - or self.main.args.growing or self.main.args.input_module ) @@ -165,6 +187,7 @@ def verify_given_flags(self): self._check_mutually_exclusive_flags() self._check_if_root_is_required() self._check_interface_validity() + self._check_if_growing_zeek_dir_is_used_correctly() if (self.main.args.verbose and int(self.main.args.verbose) > 3) or ( self.main.args.debug and int(self.main.args.debug) > 3 @@ -193,9 +216,7 @@ def verify_given_flags(self): ) return - if self.main.conf.use_global_p2p() and not ( - self.main.args.interface or self.main.args.growing - ): + if self.main.conf.use_global_p2p() and not self.main.args.interface: print( "Warning: Global P2P (Fides Module + Iris Module) is only supported using " "an interface. Global P2P (Fides Module + Iris Module) Disabled." diff --git a/slips_files/core/helpers/filemonitor.py b/slips_files/core/helpers/filemonitor.py index c9726a0b9e..3b4130cb6d 100644 --- a/slips_files/core/helpers/filemonitor.py +++ b/slips_files/core/helpers/filemonitor.py @@ -74,5 +74,3 @@ def on_modified(self, event): # tell slips to terminate self.db.publish_stop() break - elif "whitelist" in filename: - self.db.publish("reload_whitelist", "reload") diff --git a/slips_files/core/helpers/whitelist/domain_whitelist.py b/slips_files/core/helpers/whitelist/domain_whitelist.py index 1cd086455a..9a0476cf47 100644 --- a/slips_files/core/helpers/whitelist/domain_whitelist.py +++ b/slips_files/core/helpers/whitelist/domain_whitelist.py @@ -1,5 +1,6 @@ # SPDX-FileCopyrightText: 2021 Sebastian Garcia # SPDX-License-Identifier: GPL-2.0-only +import json from typing import List, Dict import tldextract @@ -20,6 +21,9 @@ def name(self): def init(self): self.ip_analyzer = IPAnalyzer(self.db) self.read_configuration() + # for debugging + self.bf_hits = 0 + self.bf_misses = 0 def read_configuration(self): conf = ConfigParser() @@ -76,7 +80,6 @@ def is_whitelisted( # the reason why this function doesnt support the Attacker or # Victim as a parameter directly is that we may call it on other # values. not just attacker and victim domains. - if not isinstance(domain, str): return False @@ -104,22 +107,29 @@ def is_whitelisted( # domain is in the local whitelist, but the local whitelist # not enabled return False - whitelisted_domains: Dict[str, Dict[str, str]] - whitelisted_domains = self.db.get_whitelist("domains") - if parent_domain in whitelisted_domains: - # did the user say slips should ignore flows or alerts in the - # config file? - whitelist_should_ignore = whitelisted_domains[parent_domain][ - "what_to_ignore" - ] - # did the user say slips should ignore flows/alerts TO or from - # that domain in the config file? - dir_from_whitelist: str = whitelisted_domains[parent_domain][ - "from" - ] - else: + + if parent_domain not in self.manager.bloom_filters.domains: + # definitely not whitelisted + self.bf_hits += 1 return False + domain_info: str | None = self.db.is_whitelisted( + parent_domain, "domains" + ) + if not domain_info: + # bloom filter FP + self.bf_misses += 1 + return False + + self.bf_hits += 1 + domain_info: Dict[str, str] = json.loads(domain_info) + # did the user say slips should ignore flows or alerts in the + # config file? + whitelist_should_ignore = domain_info["what_to_ignore"] + # did the user say slips should ignore flows/alerts TO or from + # that domain in the config file? + dir_from_whitelist: str = domain_info["from"] + # match the direction and whitelist_Type of the given domain to the # ones we have from the whitelist. if not self.match.what_to_ignore( diff --git a/slips_files/core/helpers/whitelist/ip_whitelist.py b/slips_files/core/helpers/whitelist/ip_whitelist.py index 22d2dcd46d..1b066d7a32 100644 --- a/slips_files/core/helpers/whitelist/ip_whitelist.py +++ b/slips_files/core/helpers/whitelist/ip_whitelist.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2021 Sebastian Garcia # SPDX-License-Identifier: GPL-2.0-only import ipaddress +import json from typing import List, Dict from slips_files.common.abstracts.iwhitelist_analyzer import IWhitelistAnalyzer @@ -17,6 +18,9 @@ def name(self): def init(self): self.read_configuration() + # for debugging + self.bf_hits = 0 + self.bf_misses = 0 def read_configuration(self): conf = ConfigParser() @@ -52,19 +56,29 @@ def is_whitelisted( if not self.is_valid_ip(ip): return False - whitelisted_ips: Dict[str, dict] = self.db.get_whitelist("IPs") + if ip not in self.manager.bloom_filters.ips: + # defnitely not whitelisted + self.bf_hits += 1 + return False - if ip not in whitelisted_ips: + ip_info: str | None = self.db.is_whitelisted(ip, "IPs") + # reaching here means ip is in the bloom filter + if not ip_info: + # bloom filter FP + self.bf_misses += 1 return False + self.bf_hits += 1 + ip_info: Dict[str, str] = json.loads(ip_info) # Check if we should ignore src or dst alerts from this ip # from_ can be: src, dst, both # what_to_ignore can be: alerts or flows or both - whitelist_direction: str = whitelisted_ips[ip]["from"] + whitelist_direction: str = ip_info["from"] if not self.match.direction(direction, whitelist_direction): return False - ignore: str = whitelisted_ips[ip]["what_to_ignore"] + ignore: str = ip_info["what_to_ignore"] if not self.match.what_to_ignore(what_to_ignore, ignore): return False + return True diff --git a/slips_files/core/helpers/whitelist/mac_whitelist.py b/slips_files/core/helpers/whitelist/mac_whitelist.py index 126c6a2cc5..442116a245 100644 --- a/slips_files/core/helpers/whitelist/mac_whitelist.py +++ b/slips_files/core/helpers/whitelist/mac_whitelist.py @@ -1,5 +1,6 @@ # SPDX-FileCopyrightText: 2021 Sebastian Garcia # SPDX-License-Identifier: GPL-2.0-only +import json from typing import Dict import validators @@ -20,6 +21,9 @@ def name(self): def init(self): self.ip_analyzer = IPAnalyzer(self.db) self.read_configuration() + # for debugging + self.bf_hits = 0 + self.bf_misses = 0 def read_configuration(self): conf = ConfigParser() @@ -65,15 +69,24 @@ def is_whitelisted( if not self.is_valid_mac(mac): return False - whitelisted_macs: Dict[str, dict] = self.db.get_whitelist("macs") - if mac not in whitelisted_macs: + if mac not in self.manager.bloom_filters.mac_addrs: + # defnitely not whitelisted + self.bf_hits += 1 return False - whitelist_direction: str = whitelisted_macs[mac]["from"] + mac_info: str | None = self.db.is_whitelisted(mac, "macs") + if not mac_info: + self.bf_misses += 1 + return False + + self.bf_hits += 1 + + mac_info: Dict[str, dict] = json.loads(mac_info) + whitelist_direction: str = mac_info["from"] if not self.match.direction(direction, whitelist_direction): return False - whitelist_what_to_ignore: str = whitelisted_macs[mac]["what_to_ignore"] + whitelist_what_to_ignore: str = mac_info["what_to_ignore"] if not self.match.what_to_ignore( what_to_ignore, whitelist_what_to_ignore ): diff --git a/slips_files/core/helpers/whitelist/organization_whitelist.py b/slips_files/core/helpers/whitelist/organization_whitelist.py index c2612238d7..093b6bc2a3 100644 --- a/slips_files/core/helpers/whitelist/organization_whitelist.py +++ b/slips_files/core/helpers/whitelist/organization_whitelist.py @@ -8,6 +8,8 @@ Union, ) +from pybloom_live import BloomFilter + from slips_files.common.abstracts.iwhitelist_analyzer import IWhitelistAnalyzer from slips_files.common.parsers.config_parser import ConfigParser from slips_files.common.slips_utils import utils @@ -39,19 +41,36 @@ def init(self): self.ip_analyzer = IPAnalyzer(self.db) self.domain_analyzer = DomainAnalyzer(self.db) self.org_info_path = "slips_files/organizations_info/" + self.bloom_filters: Dict[str, Dict[str, BloomFilter]] + self.bloom_filters = self.manager.bloom_filters.org_filters self.read_configuration() + self.whitelisted_orgs: Dict[str, str] = self.db.get_whitelist( + "organizations" + ) + # for debugging + self.bf_hits = 0 + self.bf_misses = 0 def read_configuration(self): conf = ConfigParser() self.enable_local_whitelist: bool = conf.enable_local_whitelist() - def is_domain_in_org(self, domain: str, org: str): + def is_domain_in_org(self, domain: str, org: str) -> bool: """ Checks if the given domains belongs to the given org using the hardcoded org domains in organizations_info/org_domains """ try: - org_domains = json.loads(self.db.get_org_info(org, "domains")) + if domain not in self.bloom_filters[org]["domains"]: + self.bf_hits += 1 + return False + + if self.db.is_domain_in_org_domains(org, domain): + self.bf_hits += 1 + return True + + # match subdomains of all org domains slips knows of + org_domains: List[str] = self.db.get_org_info(org, "domains") flow_tld = self.domain_analyzer.get_tld(domain) for org_domain in org_domains: @@ -60,44 +79,58 @@ def is_domain_in_org(self, domain: str, org: str): if flow_tld != org_domain_tld: continue - # match subdomains too # if org has org.com, and the flow_domain is xyz.org.com # whitelist it if org_domain in domain: + self.bf_hits += 1 return True # if org has xyz.org.com, and the flow_domain is org.com # whitelist it if domain in org_domain: + self.bf_hits += 1 return True + self.bf_misses += 1 + except (KeyError, TypeError): # comes here if the whitelisted org doesn't have domains in # slips/organizations_info (not a famous org) # and ip doesn't have asn info. # so we don't know how to link this ip to the whitelisted org! - return False + pass + return False def is_ip_in_org(self, ip: str, org): """ Check if the given ip belongs to the given org """ try: - org_subnets: dict = self.db.get_org_ips(org) - first_octet: str = utils.get_first_octet(ip) if not first_octet: return - ip_obj = ipaddress.ip_address(ip) - # organization IPs are sorted by first octet for faster search - for range_ in org_subnets.get(first_octet, []): - if ip_obj in ipaddress.ip_network(range_): - return True + + if first_octet not in self.bloom_filters[org]["first_octets"]: + self.bf_hits += 1 + return False + + # organization IPs are sorted in the db by first octet for faster + # search + cidrs: List[str] + if cidrs := self.db.is_ip_in_org_ips(org, first_octet): + ip_obj = ipaddress.ip_address(ip) + for cidr in cidrs: + if ip_obj in ipaddress.ip_network(cidr): + self.bf_hits += 1 + return True + except (KeyError, TypeError): # comes here if the whitelisted org doesn't have # info in slips/organizations_info (not a famous org) # and ip doesn't have asn info. pass + + self.bf_misses += 1 return False def is_ip_asn_in_org_asn(self, ip: str, org): @@ -122,13 +155,23 @@ def _is_asn_in_org(self, asn: str, org: str) -> bool: """ if not (asn and asn != "Unknown"): return False + # because all ASN stored in slips organization_info/ are uppercase asn: str = asn.upper() if org.upper() in asn: return True - org_asn: List[str] = json.loads(self.db.get_org_info(org, "asn")) - return asn in org_asn + if asn not in self.bloom_filters[org]["asns"]: + self.bf_hits += 1 + return False + + if self.db.is_asn_in_org_asn(org, asn): + self.bf_hits += 1 + return True + else: + # bloom filter FP + self.bf_misses += 1 + return False def is_whitelisted(self, flow) -> bool: """checks if the given -flow- is whitelisted. not evidence/alerts.""" @@ -190,23 +233,21 @@ def _is_part_of_a_whitelisted_org( :param direction: direction of the given ioc, src or dst? :param what_to_ignore: can be "flows" or "alerts" or "both" """ - if ioc_type == IoCType.IP: if utils.is_private_ip(ioc): return False - whitelisted_orgs: Dict[str, dict] = self.db.get_whitelist( - "organizations" - ) - if not whitelisted_orgs: + if not self.whitelisted_orgs: return False - for org in whitelisted_orgs: - dir_from_whitelist = whitelisted_orgs[org]["from"] + for org in self.whitelisted_orgs: + org_info = json.loads(self.whitelisted_orgs[org]) + + dir_from_whitelist = org_info["from"] if not self.match.direction(direction, dir_from_whitelist): continue - whitelist_what_to_ignore = whitelisted_orgs[org]["what_to_ignore"] + whitelist_what_to_ignore = org_info["what_to_ignore"] if not self.match.what_to_ignore( what_to_ignore, whitelist_what_to_ignore ): diff --git a/slips_files/core/helpers/whitelist/whitelist.py b/slips_files/core/helpers/whitelist/whitelist.py index b0e944cdd1..7d80c8b6cf 100644 --- a/slips_files/core/helpers/whitelist/whitelist.py +++ b/slips_files/core/helpers/whitelist/whitelist.py @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: 2021 Sebastian Garcia # SPDX-License-Identifier: GPL-2.0-only from typing import ( - Optional, Dict, List, Union, @@ -10,6 +9,7 @@ from slips_files.common.parsers.config_parser import ConfigParser from slips_files.common.printer import Printer +from slips_files.core.helpers.bloom_filters_manager import BFManager from slips_files.core.helpers.whitelist.domain_whitelist import DomainAnalyzer from slips_files.core.helpers.whitelist.ip_whitelist import IPAnalyzer from slips_files.core.helpers.whitelist.mac_whitelist import MACAnalyzer @@ -31,10 +31,11 @@ class Whitelist: name = "Whitelist" - def __init__(self, logger: Output, db): + def __init__(self, logger: Output, db, bloom_filter_manager: BFManager): self.printer = Printer(logger, self.name) self.name = "whitelist" self.db = db + self.bloom_filters: BFManager = bloom_filter_manager self.match = WhitelistMatcher() self.parser = WhitelistParser(self.db, self) self.ip_analyzer = IPAnalyzer(self.db, whitelist_manager=self) @@ -50,7 +51,7 @@ def read_configuration(self): def update(self): """ parses the local whitelist specified in the slips.yaml - and stores the parsed results in the db + and stores the parsed results in the db and in bloom filters """ self.parser.parse() self.db.set_whitelist("IPs", self.parser.whitelisted_ips) @@ -140,33 +141,6 @@ def is_whitelisted_flow(self, flow) -> bool: return self.org_analyzer.is_whitelisted(flow) - def get_all_whitelist(self) -> Optional[Dict[str, dict]]: - """ - returns the whitelisted ips, domains, org from the db - returns a dict with the following keys - 'mac', 'organizations', 'IPs', 'domains' - this function tries to get the whitelist from the db 10 times - """ - whitelist: Dict[str, dict] = self.db.get_all_whitelist() - max_tries = 10 - # if this module is loaded before profilerProcess or before we're - # done processing the whitelist in general - # the database won't return the whitelist - # so we need to try several times until the db returns the - # populated whitelist - # empty dicts evaluate to False - while not bool(whitelist) and max_tries != 0: - # try max 10 times to get the whitelist, if it's still empty - # hen it's not empty by mistake - max_tries -= 1 - whitelist = self.db.get_all_whitelist() - - if max_tries == 0: - # we tried 10 times to get the whitelist, it's probably empty. - return - - return whitelist - def is_whitelisted_evidence(self, evidence: Evidence) -> bool: """ Checks if an evidence is whitelisted @@ -243,3 +217,25 @@ def _is_whitelisted_entity( return True return False + + def get_bloom_filters_stats(self) -> Dict[str, float]: + """ + returns the bloom filters stats + """ + total_hits = 0 + total_misses = 0 + + for helper in ( + self.ip_analyzer, + self.domain_analyzer, + self.mac_analyzer, + self.org_analyzer, + ): + total_hits += helper.bf_hits + total_misses += helper.bf_misses + + # Bloom filters cannot produce false negatives:D + return ( + f"Number of times bloom filter was acuurate (TN + TP):" + f" {total_hits}, FPs: {total_misses}" + ) diff --git a/slips_files/core/helpers/whitelist/whitelist_parser.py b/slips_files/core/helpers/whitelist/whitelist_parser.py index bccfc53be1..ea94d38912 100644 --- a/slips_files/core/helpers/whitelist/whitelist_parser.py +++ b/slips_files/core/helpers/whitelist/whitelist_parser.py @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: 2021 Sebastian Garcia # SPDX-License-Identifier: GPL-2.0-only import ipaddress -import json import os from typing import TextIO, List, Dict, Optional import validators @@ -18,27 +17,11 @@ def __init__(self, db, manager): # to have access to the print function self.manager = manager self.read_configuration() - self.init_whitelists() - self.org_info_path = "slips_files/organizations_info/" - - def init_whitelists(self): - """ - initializes the dicts we'll be using for storing the parsed - whitelists. - uses existing dicts from the db if found. - """ self.whitelisted_ips = {} self.whitelisted_domains = {} self.whitelisted_orgs = {} self.whitelisted_mac = {} - if self.db.has_cached_whitelist(): - # since this parser can run when the user modifies whitelist.conf - # and not just when the user starts slips - # we need to check if the dicts are already there in the cache db - self.whitelisted_ips = self.db.get_whitelist("IPs") - self.whitelisted_domains = self.db.get_whitelist("domains") - self.whitelisted_orgs = self.db.get_whitelist("organizations") - self.whitelisted_mac = self.db.get_whitelist("mac") + self.org_info_path = "slips_files/organizations_info/" def get_dict_for_storing_data(self, data_type: str): """ @@ -93,9 +76,6 @@ def remove_entry_from_cache_db( cache.pop(entry_to_remove["data"]) return True - def set_number_of_columns(self, line: str) -> None: - self.NUMBER_OF_WHITELIST_COLUMNS: int = len(line.split(",")) - def update_whitelisted_domains(self, domain: str, info: Dict[str, str]): if not utils.is_valid_domain(domain): return @@ -127,7 +107,7 @@ def update_whitelisted_mac_addresses(self, mac: str, info: Dict[str, str]): self.whitelisted_mac[mac] = info def update_whitelisted_ips(self, ip: str, info: Dict[str, str]): - if not (validators.ipv6(ip) or validators.ipv4): + if not (validators.ipv6(ip) or validators.ipv4(ip)): return self.whitelisted_ips[ip] = info @@ -150,7 +130,7 @@ def parse_line(self, line: str) -> Dict[str, str]: def call_handler(self, parsed_line: Dict[str, str]): """ calls the appropriate handler based on the type of data in the - parsed line + given line :param parsed_line: output dict of self.parse_line should have the following keys { type": .. @@ -196,7 +176,7 @@ def load_org_asn(self, org) -> Optional[List[str]]: line = line.replace("\n", "").strip() org_asn.append(line.upper()) org_asn_file.close() - self.db.set_org_info(org, json.dumps(org_asn), "asn") + self.db.set_org_info(org, org_asn, "asn") return org_asn def load_org_domains(self, org): @@ -220,7 +200,7 @@ def load_org_domains(self, org): domains.append(line.lower()) domain_info.close() - self.db.set_org_info(org, json.dumps(domains), "domains") + self.db.set_org_info(org, domains, "domains") return domains def is_valid_network(self, network: str) -> bool: @@ -268,7 +248,8 @@ def load_org_ips(self, org) -> Optional[Dict[str, List[str]]]: org_subnets[first_octet] = [line] org_info.close() - self.db.set_org_info(org, json.dumps(org_subnets), "IPs") + + self.db.set_org_cidrs(org, org_subnets) return org_subnets def parse(self) -> bool: @@ -282,7 +263,6 @@ def parse(self) -> bool: while line := whitelist.readline(): line_number += 1 if line.startswith('"IoCType"'): - self.set_number_of_columns(line) continue if line.startswith(";"): @@ -304,10 +284,11 @@ def parse(self) -> bool: except Exception: self.manager.print( f"Line {line_number} in whitelist.conf is invalid." - f" Skipping. " + f" Skipping." ) continue self.call_handler(parsed_line) + whitelist.close() return True diff --git a/slips_files/core/input.py b/slips_files/core/input.py index 2032e90c6b..60756ff9c9 100644 --- a/slips_files/core/input.py +++ b/slips_files/core/input.py @@ -431,9 +431,11 @@ def get_flows_number(self, file: str) -> int: def read_zeek_folder(self): """ - This is the case that a folder full of zeek files is passed with -f - DISCLAIMER: this func does not run when slips is running on an - interface with -i or -ap + This function runs when + - a finite zeek dir is given to slips with -f + - a growing zeek dir is given to slips with -g + This func does not run when slips is running on an interface with + -i or -ap """ # wait max 10 seconds before stopping slips if no new flows are read self.bro_timeout = 10 @@ -445,10 +447,11 @@ def read_zeek_folder(self): self.bro_timeout = float("inf") self.zeek_dir = self.given_path + # if slips is just reading a finite zeek dir, there's no way to + # know the interface + interface = "default" if self.args.growing: - interface = utils.infer_used_interface() - else: - interface = "default" + interface = self.args.interface self.start_observer(self.zeek_dir, interface) # if 1 file is zeek tabs the rest should be the same diff --git a/slips_files/core/profiler.py b/slips_files/core/profiler.py index 42f1fd006d..599cb90f98 100644 --- a/slips_files/core/profiler.py +++ b/slips_files/core/profiler.py @@ -89,16 +89,13 @@ def init( self.input_type = False self.rec_lines = 0 self.localnet_cache = {} - self.whitelist = Whitelist(self.logger, self.db) + self.whitelist = Whitelist(self.logger, self.db, self.bloom_filters) self.read_configuration() self.symbol = SymbolHandler(self.logger, self.db) # there has to be a timeout or it will wait forever and never # receive a new line self.timeout = 0.0000001 - self.c1 = self.db.subscribe("reload_whitelist") - self.channels = { - "reload_whitelist": self.c1, - } + self.channels = {} # is set by this proc to tell input proc that we are done # processing and it can exit no issue self.is_profiler_done_event = is_profiler_done_event @@ -120,6 +117,7 @@ def init( self.pending_flows_queue_lock = threading.Lock() # flag to know which flow is the start of the pcap/file self.first_flow = True + self.handle_setting_local_net_lock = threading.Lock() def read_configuration(self): conf = ConfigParser() @@ -287,6 +285,7 @@ def add_flow_to_profile(self, flow): # Check if the flow is whitelisted and we should not process it if self.whitelist.is_whitelisted_flow(flow): + self.print(f"{self.whitelist.get_bloom_filters_stats()}", 2, 0) return True # 5th. Store the data according to the paremeters @@ -442,6 +441,7 @@ def should_set_localnet(self, flow) -> bool: if flow.interface in self.localnet_cache: return False else: + # running on a file, impossible to get the interface if "default" in self.localnet_cache: return False @@ -588,20 +588,24 @@ def handle_setting_local_net(self, flow): stores the local network if possible sets the self.localnet_cache dict """ - if not self.should_set_localnet(flow): - return + # to avoid running this func from the 3 profiler threads at the + # same time. + with self.handle_setting_local_net_lock: + if not self.should_set_localnet(flow): + return - if self.db.is_running_non_stop(): - self.localnet_cache = self.get_localnet_of_given_interface() - else: - self.localnet_cache = self.get_local_net_of_flow(flow) + if self.db.is_running_non_stop(): + self.localnet_cache = self.get_localnet_of_given_interface() + else: + self.localnet_cache = self.get_local_net_of_flow(flow) + + for interface, local_net in self.localnet_cache.items(): + self.db.set_local_network(local_net, interface) - for interface, local_net in self.localnet_cache.items(): - self.db.set_local_network(local_net, interface) - to_print = f"Used local network: {green(local_net)}" - if interface != "default": - to_print += f" for interface {green(interface)}." - self.print(to_print) + to_print = f"Used local network: {green(local_net)}" + if interface != "default": + to_print += f" for interface {green(interface)}." + self.print(to_print) def get_msg_from_input_proc( self, q: multiprocessing.Queue, thread_safe=False @@ -627,8 +631,7 @@ def start_profiler_threads(self): """starts 3 profiler threads for faster processing of the flows""" num_of_profiler_threads = 3 for _ in range(num_of_profiler_threads): - t = threading.Thread(target=self.process_flow) - t.daemon = True + t = threading.Thread(target=self.process_flow, daemon=True) t.start() self.profiler_threads.append(t) @@ -665,7 +668,6 @@ def process_flow(self): This function runs in 3 parallel threads for faster processing of the flows """ - while not self.stop_profiler_thread(): msg = self.get_msg_from_input_proc( self.flows_to_process_q, thread_safe=True @@ -741,16 +743,6 @@ def main(self): # we're using self.should_stop() here instead of while True to be # able to unit test this function:D while not self.should_stop(): - # listen on this channel in case whitelist.conf is changed, - # we need to process the new changes - if self.get_msg("reload_whitelist"): - # if whitelist.conf is edited using pycharm - # a msg will be sent to this channel on every keypress, - # because pycharm saves file automatically - # otherwise this channel will get a msg only when - # whitelist.conf is modified and saved to disk - self.whitelist.update() - msg = self.get_msg_from_input_proc(self.profiler_queue) if not msg: # wait for msgs diff --git a/tests/integration_tests/fides_config.yaml b/tests/integration_tests/fides_config.yaml index 55b7a9c060..52d748df29 100644 --- a/tests/integration_tests/fides_config.yaml +++ b/tests/integration_tests/fides_config.yaml @@ -305,11 +305,11 @@ exporting_alerts: # you can set the port to 443 or 80. port : 1234 use_https : False - discovery_path : /services/discovery-a - inbox_path : /services/inbox-a + # TAXII 2 discovery endpoint (relative path or full URL) + discovery_path : /taxii2/ - # Collection on the server you want to push stix data to - collection_name : collection-a + # Collection (ID or title) on the server you want to push STIX data to + collection_name : Alerts # This value is only used when slips is running non-stop (e.g with -i ) # push_delay is the time to wait before pushing STIX data to server (in seconds) @@ -322,11 +322,6 @@ exporting_alerts: taxii_username : admin taxii_password : admin - # URL used to obtain JWT token. set this to '' if you don't want to use it - # is required for JWT based authentication. (JWT based authentication is Optional) - # It's usually /management/auth - jwt_auth_path : /management/auth - ############################# CESNET: @@ -429,4 +424,3 @@ local_p2p: # create p2p.log with additional info about peer communications? create_p2p_logfile : False use_p2p : False - diff --git a/tests/integration_tests/test_fides.py b/tests/integration_tests/test_fides.py index 290f195344..b3f5f20149 100644 --- a/tests/integration_tests/test_fides.py +++ b/tests/integration_tests/test_fides.py @@ -153,10 +153,12 @@ def test_conf_file2(path, output_dir, redis_port): "./slips.py", "-t", "-g", + str(path), + # dummy interface required by -g + "-i", + "eth0", "-e", "1", - "-f", - str(path), "-o", str(output_dir), "-c", @@ -242,10 +244,12 @@ def test_trust_recommendation_response(path, output_dir, redis_port): "./slips.py", "-t", "-g", + str(path), + # dummy interface required by -g + "-i", + "eth0", "-e", "1", - "-f", - str(path), "-o", str(output_dir), "-c", diff --git a/tests/integration_tests/test_iris.py b/tests/integration_tests/test_iris.py index 0e13053cdb..6eadfb44d9 100644 --- a/tests/integration_tests/test_iris.py +++ b/tests/integration_tests/test_iris.py @@ -175,10 +175,12 @@ def test_messaging( "./slips.py", "-t", "-g", + str(zeek_dir_path), + # dummy interface required by -g + "-i", + "eth0", "-e", "1", - "-f", - str(zeek_dir_path), "-o", str(output_dir), "-c", @@ -225,7 +227,7 @@ def test_messaging( "PeerDiscovery": { "ListOfMultiAddresses": [original_conn_string] }, - "Identity": {"KeyFile": "second.priv"} + "Identity": {"KeyFile": "second.priv"}, }, ) # generate a second command for the second peer @@ -233,10 +235,12 @@ def test_messaging( "./slips.py", "-t", "-g", + str(zeek_dir_path), + # dummy interface required by -g + "-i", + "eth0", "-e", "1", - "-f", - str(zeek_dir_path), "-o", str(output_dir_peer), "-c", @@ -304,6 +308,6 @@ def test_messaging( "Redis": {"Port": 6644}, "Server": {"Port": 9010}, "PeerDiscovery": {}, - "Identity": {"KeyFile": "private.key"} + "Identity": {"KeyFile": "private.key"}, }, ) diff --git a/tests/module_factory.py b/tests/module_factory.py index a50c1d3df0..4497d9ed1b 100644 --- a/tests/module_factory.py +++ b/tests/module_factory.py @@ -189,45 +189,44 @@ def create_main_obj(self): @patch(MODULE_DB_MANAGER, name="mock_db") def create_http_analyzer_obj(self, mock_db): http_analyzer = HTTPAnalyzer( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) - - # override the self.print function to avoid broken pipes http_analyzer.print = Mock() return http_analyzer @patch(MODULE_DB_MANAGER, name="mock_db") def create_fidesModule_obj(self, mock_db): fm = FidesModule( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) - - # override the self.print function fm.print = Mock() return fm @patch(MODULE_DB_MANAGER, name="mock_db") def create_virustotal_obj(self, mock_db): virustotal = VT( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) virustotal.print = Mock() virustotal.__read_configuration = Mock() @@ -239,18 +238,111 @@ def create_arp_obj(self, mock_db): "modules.arp.arp.ARP.wait_for_arp_scans", return_value=Mock() ): arp = ARP( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) arp.print = Mock() arp.evidence_filter.is_slips_peer = Mock(return_value=False) return arp + def create_checker_obj(self): + mock_main = Mock() + mock_main.args = MagicMock() + mock_main.args.output = "test_output" + mock_main.args.verbose = "0" + mock_main.args.debug = "0" + mock_main.redis_man = Mock() + mock_main.terminate_slips = Mock() + mock_main.print_version = Mock() + mock_main.get_input_file_type = Mock() + mock_main.handle_flows_from_stdin = Mock() + mock_main.pid = 12345 + + checker = Checker(mock_main) + return checker + + @patch(MODULE_DB_MANAGER, name="mock_db") + def create_go_director_obj(self, mock_db): + with patch("modules.p2ptrust.utils.utils.send_evaluation_to_go"): + go_director = GoDirector( + logger=self.logger, + trustdb=Mock(spec=TrustDB), + db=mock_db, + storage_name="test_storage", + override_p2p=False, + gopy_channel="test_gopy", + pygo_channel="test_pygo", + p2p_reports_logfile="test_reports.log", + ) + go_director.print = Mock() + return go_director + + @patch(DB_MANAGER, name="mock_db") + def create_daemon_object(self, mock_db): + with ( + patch("slips.daemon.Daemon.read_pidfile", return_type=None), + patch("slips.daemon.Daemon.read_configuration"), + patch("builtins.open", mock_open(read_data=None)), + ): + daemon = Daemon(MagicMock()) + daemon.stderr = "errors.log" + daemon.stdout = "slips.log" + daemon.stdin = "/dev/null" + daemon.logsfile = "slips.log" + daemon.pidfile_dir = "/tmp" + daemon.pidfile = os.path.join(daemon.pidfile_dir, "slips_daemon.lock") + daemon.daemon_start_lock = "slips_daemon_start" + daemon.daemon_stop_lock = "slips_daemon_stop" + return daemon + + @contextmanager + def dummy_acquire_flock(self): + yield + + @patch("sqlite3.connect") + def create_trust_db_obj(self, sqlite_mock): + with ( + patch("slips_files.common.abstracts.isqlite.ISQLite._init_flock"), + patch( + "slips_files.common.abstracts.isqlite.ISQLite._acquire_flock" + ), + ): + trust_db = TrustDB( + logger=self.logger, + db_file=Mock(), + main_pid=Mock(), + drop_tables_on_startup=False, + ) + trust_db.conn = Mock() + trust_db.print = Mock() + trust_db._init_flock = Mock() + trust_db._acquire_flock = MagicMock() + return trust_db + + @patch(MODULE_DB_MANAGER, name="mock_db") + def create_base_model_obj(self, mock_db): + logger = Mock(spec=Output) + trustdb = Mock() + return BaseModel(logger, trustdb, mock_db) + + def create_notify_obj(self): + notify = Notify() + return notify + + def create_ioc_handler_obj(self): + handler = IoCHandler() + handler.r = Mock() + handler.rcache = Mock() + handler.constants = Constants() + handler.channels = Channels() + return handler + @patch(MODULE_DB_MANAGER, name="mock_db") def create_arp_filter_obj(self, mock_db): filter = ARPEvidenceFilter(Mock(), Mock(), mock_db) # conf # args @@ -259,13 +351,14 @@ def create_arp_filter_obj(self, mock_db): @patch(MODULE_DB_MANAGER, name="mock_db") def create_blocking_obj(self, mock_db): blocking = Blocking( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) # override the print function to avoid broken pipes blocking.print = Mock() @@ -295,9 +388,8 @@ def create_flowalerts_obj(self, mock_db): slips_args=Mock(), conf=Mock(), ppid=Mock(), + bloom_filters_manager=Mock(), ) - - # override the self.print function to avoid broken pipes flowalerts.print = Mock() return flowalerts @@ -346,6 +438,21 @@ def create_software_analyzer_obj(self, mock_db): flowalerts = self.create_flowalerts_obj() return Software(flowalerts.db, flowalerts=flowalerts) + @patch(MODULE_DB_MANAGER, name="mock_db") + def create_ip_info_obj(self, mock_db): + ip_info = IPInfo( + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), + ) + ip_info.print = Mock() + return ip_info + @patch(MODULE_DB_MANAGER, name="mock_db") def create_input_obj( self, input_information, input_type, mock_db, line_type=False @@ -359,6 +466,7 @@ def create_input_obj( slips_args=Mock(), conf=Mock(), ppid=Mock(), + bloom_filters_manager=Mock(), is_input_done=Mock(), profiler_queue=self.profiler_queue, input_type=input_type, @@ -379,40 +487,24 @@ def create_input_obj( return input - @patch(MODULE_DB_MANAGER, name="mock_db") - def create_ip_info_obj(self, mock_db): - ip_info = IPInfo( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid - ) - # override the self.print function to avoid broken pipes - ip_info.print = Mock() - return ip_info - @patch(DB_MANAGER, name="mock_db") def create_asn_obj(self, mock_db): return ASN(mock_db) @patch(MODULE_DB_MANAGER, name="mock_db") def create_leak_detector_obj(self, mock_db): - # this file will be used for storing the module output - # and deleted when the tests are done test_pcap = "dataset/test7-malicious.pcap" yara_rules_path = "tests/yara_rules_for_testing/rules/" compiled_yara_rules_path = "tests/yara_rules_for_testing/compiled/" leak_detector = LeakDetector( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) leak_detector.print = Mock() # this is the path containing 1 yara rule for testing, @@ -432,11 +524,11 @@ def create_profiler_obj(self, mock_db): slips_args=Mock(), conf=Mock(), ppid=Mock(), + bloom_filters_manager=Mock(), is_profiler_done=Mock(), profiler_queue=self.input_queue, is_profiler_done_event=Mock(), ) - # override the self.print function to avoid broken pipes profiler.print = Mock() profiler.local_whitelist_path = "tests/test_whitelist.conf" profiler.db = mock_db @@ -469,16 +561,15 @@ def create_utils_obj(self): @patch(MODULE_DB_MANAGER, name="mock_db") def create_threatintel_obj(self, mock_db): threatintel = ThreatIntel( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) - - # override the self.print function to avoid broken pipes threatintel.print = Mock() return threatintel @@ -489,21 +580,26 @@ def create_spamhaus_obj(self, mock_db): @patch(MODULE_DB_MANAGER, name="mock_db") def create_update_manager_obj(self, mock_db): update_manager = UpdateManager( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) - # override the self.print function to avoid broken pipes update_manager.print = Mock() return update_manager @patch(MODULE_DB_MANAGER, name="mock_db") def create_whitelist_obj(self, mock_db): - whitelist = Whitelist(self.logger, mock_db) + bloom_filter_manager_mock = Mock() + whitelist = Whitelist( + self.logger, + mock_db, + bloom_filter_manager=bloom_filter_manager_mock, + ) # override the self.print function to avoid broken pipes whitelist.print = Mock() whitelist.whitelist_path = "tests/test_whitelist.conf" @@ -607,122 +703,47 @@ def create_evidence_obj( @patch(MODULE_DB_MANAGER, name="mock_db") def create_network_discovery_obj(self, mock_db): network_discovery = NetworkDiscovery( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) return network_discovery + def create_markov_chain_obj(self): + return Matrix() + @patch(MODULE_DB_MANAGER, name="mock_db") def create_arp_poisoner_obj(self, mock_db): poisoner = ARPPoisoner( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) return poisoner - def create_markov_chain_obj(self): - return Matrix() - - def create_checker_obj(self): - mock_main = Mock() - mock_main.args = MagicMock() - mock_main.args.output = "test_output" - mock_main.args.verbose = "0" - mock_main.args.debug = "0" - mock_main.redis_man = Mock() - mock_main.terminate_slips = Mock() - mock_main.print_version = Mock() - mock_main.get_input_file_type = Mock() - mock_main.handle_flows_from_stdin = Mock() - mock_main.pid = 12345 - - checker = Checker(mock_main) - return checker - - @patch(MODULE_DB_MANAGER, name="mock_db") - def create_go_director_obj(self, mock_db): - with patch("modules.p2ptrust.utils.utils.send_evaluation_to_go"): - go_director = GoDirector( - logger=self.logger, - trustdb=Mock(spec=TrustDB), - db=mock_db, - storage_name="test_storage", - override_p2p=False, - gopy_channel="test_gopy", - pygo_channel="test_pygo", - p2p_reports_logfile="test_reports.log", - ) - go_director.print = Mock() - return go_director - - @patch(DB_MANAGER, name="mock_db") - def create_daemon_object(self, mock_db): - with ( - patch("slips.daemon.Daemon.read_pidfile", return_type=None), - patch("slips.daemon.Daemon.read_configuration"), - patch("builtins.open", mock_open(read_data=None)), - ): - daemon = Daemon(MagicMock()) - daemon.stderr = "errors.log" - daemon.stdout = "slips.log" - daemon.stdin = "/dev/null" - daemon.logsfile = "slips.log" - daemon.pidfile_dir = "/tmp" - daemon.pidfile = os.path.join(daemon.pidfile_dir, "slips_daemon.lock") - daemon.daemon_start_lock = "slips_daemon_start" - daemon.daemon_stop_lock = "slips_daemon_stop" - return daemon - - @contextmanager - def dummy_acquire_flock(self): - yield - - @patch("sqlite3.connect") - def create_trust_db_obj(self, sqlite_mock): - with ( - patch("slips_files.common.abstracts.isqlite.ISQLite._init_flock"), - patch( - "slips_files.common.abstracts.isqlite.ISQLite._acquire_flock" - ), - ): - trust_db = TrustDB( - logger=self.logger, - db_file=Mock(), - main_pid=Mock(), - drop_tables_on_startup=False, - ) - trust_db.conn = Mock() - trust_db.print = Mock() - trust_db._init_flock = Mock() - trust_db._acquire_flock = MagicMock() - return trust_db - @patch(MODULE_DB_MANAGER, name="mock_db") - def create_base_model_obj(self, mock_db): - logger = Mock(spec=Output) - trustdb = Mock() - return BaseModel(logger, trustdb, mock_db) - - def create_notify_obj(self): - notify = Notify() - return notify - - def create_ioc_handler_obj(self): - handler = IoCHandler() - handler.r = Mock() - handler.rcache = Mock() - handler.constants = Constants() - handler.channels = Channels() + def create_evidence_handler_obj(self, mock_db): + handler = EvidenceHandler( + logger=Mock(), + output_dir="/tmp", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), + ) + handler.db = mock_db return handler @patch(MODULE_DB_MANAGER, name="mock_db") @@ -737,6 +758,7 @@ def create_cesnet_obj(self, mock_db): Mock(), # args Mock(), # conf Mock(), # ppid + Mock(), # Bloom filter manager ) cesnet.db = mock_db cesnet.wclient = MagicMock() @@ -747,20 +769,6 @@ def create_cesnet_obj(self, mock_db): cesnet.print = MagicMock() return cesnet - @patch(MODULE_DB_MANAGER, name="mock_db") - def create_evidence_handler_obj(self, mock_db): - handler = EvidenceHandler( - logger=Mock(), - output_dir="/tmp", - redis_port=6379, - termination_event=Mock(), - slips_args=Mock(), - conf=Mock(), - ppid=Mock(), - ) - handler.db = mock_db - return handler - @patch(MODULE_DB_MANAGER, name="mock_db") def create_evidence_formatter_obj(self, mock_db): args = Mock() @@ -775,13 +783,14 @@ def create_symbol_handler_obj(self, mock_db): @patch(MODULE_DB_MANAGER, name="mock_db") def create_riskiq_obj(self, mock_db): riskiq = RiskIQ( - self.logger, - "dummy_output_dir", - 6379, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=self.logger, + output_dir="dummy_output_dir", + redis_port=6379, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) riskiq.db = mock_db return riskiq @@ -792,13 +801,14 @@ def create_timeline_object(self, mock_db): output_dir = "/tmp" redis_port = 6379 tl = Timeline( - logger, - output_dir, - redis_port, - Mock(), # termination event - Mock(), # args - Mock(), # conf - Mock(), # ppid + logger=logger, + output_dir=output_dir, + redis_port=redis_port, + termination_event=Mock(), + slips_args=Mock(), + conf=Mock(), + ppid=Mock(), + bloom_filters_manager=Mock(), ) tl.db = mock_db return tl diff --git a/tests/test_checker.py b/tests/test_checker.py index 7f7dedc383..c80a0b449e 100644 --- a/tests/test_checker.py +++ b/tests/test_checker.py @@ -126,6 +126,7 @@ def test_check_given_flags_root_user(monkeypatch): def test_check_input_type_interface(): checker = ModuleFactory().create_checker_obj() checker.main.args.interface = "eth0" + checker.main.args.growing = None checker.main.args.filepath = None checker.main.args.db = None checker.main.args.input_module = None @@ -134,6 +135,32 @@ def test_check_input_type_interface(): assert result == ("interface", "eth0", False) +def test_get_input_type_with_interface_and_growing(): + """ + Test get_input_type when both --interface and --growing are set. + Should trigger the first if condition and return (input_type, input_information, line_type). + """ + checker = ModuleFactory().create_checker_obj() + + # Arrange: set both interface and growing to trigger the first if + checker.main.args.interface = "eth0" + checker.main.args.growing = "/path/to/dir" + checker.main.args.filepath = None + checker.main.args.db = None + checker.main.args.input_module = None + checker.main.args.access_point = None + + # Mock get_input_file_type to return a predictable result + with patch.object( + checker.main, "get_input_file_type", return_value="zeek_folder" + ) as mock_get_type: + result = checker.get_input_type() + + # Assert + mock_get_type.assert_called_once_with("/path/to/dir") + assert result == ("zeek_folder", "/path/to/dir", False) + + def test_check_input_type_db(): checker = ModuleFactory().create_checker_obj() checker.main.args.interface = None diff --git a/tests/test_daemon.py b/tests/test_daemon.py index 0a4ea9219e..d7c3877395 100644 --- a/tests/test_daemon.py +++ b/tests/test_daemon.py @@ -222,7 +222,7 @@ def test_killdaemon(pid, os_kill_side_effect): daemon.pid = str(pid) with patch("os.kill", side_effect=os_kill_side_effect) as mock_kill: - daemon.killdaemon() + daemon._kill_self() mock_kill.assert_called_once_with(pid, signal.SIGTERM) diff --git a/tests/test_evidence_handler.py b/tests/test_evidence_handler.py index af47ea0cec..73498a8412 100644 --- a/tests/test_evidence_handler.py +++ b/tests/test_evidence_handler.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0-only import pytest import os -from unittest.mock import Mock, MagicMock, patch, call +from unittest.mock import Mock, MagicMock, patch from slips_files.core.structures.alerts import Alert from slips_files.core.structures.evidence import ( @@ -185,20 +185,21 @@ def test_clean_file(output_dir, file_to_clean, file_exists): @pytest.mark.parametrize( "data", [ - # testcase1: Basic log entry "Test log entry", - # testcase2: Another log entry "Another log entry", ], ) def test_add_to_log_file(data): evidence_handler = ModuleFactory().create_evidence_handler_obj() - mock_file = Mock() - evidence_handler.logfile = mock_file + evidence_handler.evidence_logger_q.put = Mock() + + # Act evidence_handler.add_to_log_file(data) - assert mock_file.write.call_count == 2 - mock_file.write.assert_has_calls([call(data), call("\n")]) - mock_file.flush.assert_called_once() + + # Assert + evidence_handler.evidence_logger_q.put.assert_called_once_with( + {"to_log": data, "where": "alerts.log"} + ) @pytest.mark.parametrize( @@ -247,11 +248,18 @@ def test_add_alert_to_json_log_file( ) evidence_handler = ModuleFactory().create_evidence_handler_obj() evidence_handler.jsonfile = mock_file - evidence_handler.idmefv2.convert_to_idmef_alert = Mock(return_value=True) - with patch("json.dump") as mock_json_dump: - evidence_handler.add_alert_to_json_log_file(alert) - mock_json_dump.assert_called_once() - mock_file.write.assert_any_call("\n") + evidence_handler.idmefv2.convert_to_idmef_alert = Mock( + return_value="alert_in_idmef_format" + ) + evidence_handler.evidence_logger_q.put = Mock() + + evidence_handler.add_alert_to_json_log_file(alert) + evidence_handler.evidence_logger_q.put.assert_called_once_with( + { + "to_log": "alert_in_idmef_format", + "where": "alerts.json", + } + ) def test_show_popup(): diff --git a/tests/test_host_ip_manager.py b/tests/test_host_ip_manager.py index f53d1f9875..3fc4652b4e 100644 --- a/tests/test_host_ip_manager.py +++ b/tests/test_host_ip_manager.py @@ -67,89 +67,51 @@ def test_update_host_ip_should_update( assert host_ip_man.store_host_ip.call_count == expected_calls -@pytest.mark.parametrize( - "args_interface,args_access_point,iface_addrs,expected", - [ - # Single interface with valid IPv4 - ( - "eth0", - None, - {netifaces.AF_INET: [{"addr": "192.168.1.10"}]}, - {"eth0": "192.168.1.10"}, - ), - # Only loopback IP -> should be skipped - ( - "lo", - None, - {netifaces.AF_INET: [{"addr": "127.0.0.1"}]}, - {}, - ), - # Interface without AF_INET -> skipped - ( - "eth1", - None, - {}, - {}, - ), - ], -) @patch("netifaces.ifaddresses") -def test_get_host_ips_single_interface( - mock_ifaddresses, - args_interface, - args_access_point, - iface_addrs, - expected, -): - """Test _get_host_ips for single-interface cases.""" +def test_get_host_ips_single_interface(mock_ifaddresses): + """Test _get_host_ips when using a single interface via -i.""" host_ip_man = ModuleFactory().create_host_ip_manager_obj() - host_ip_man.main.args.growing = None - host_ip_man.main.args.interface = args_interface - host_ip_man.main.args.access_point = args_access_point + host_ip_man.main.args.interface = "eth0" + host_ip_man.main.args.access_point = None + + mock_ifaddresses.return_value = { + netifaces.AF_INET: [{"addr": "192.168.1.10"}] + } - mock_ifaddresses.return_value = iface_addrs result = host_ip_man._get_host_ips() - assert result == expected - mock_ifaddresses.assert_called_once_with(args_interface) + assert result == {"eth0": "192.168.1.10"} + mock_ifaddresses.assert_called_once_with("eth0") @patch("netifaces.ifaddresses") -def test_get_host_ips_multiple_interfaces_from_access_point(mock_ifaddresses): - """Test _get_host_ips when using multiple interfaces via --access-point.""" +def test_get_host_ips_ipv6_fallback(mock_ifaddresses): + """Test _get_host_ips uses IPv6 when no IPv4 is found.""" host_ip_man = ModuleFactory().create_host_ip_manager_obj() - host_ip_man.main.args.interface = None - host_ip_man.main.args.growing = None - host_ip_man.main.args.access_point = "wlan0,eth0" - - def mock_ifaddresses_side_effect(iface): - if iface == "wlan0": - return {netifaces.AF_INET: [{"addr": "10.0.0.5"}]} - elif iface == "eth0": - return {netifaces.AF_INET: [{"addr": "192.168.0.8"}]} - return {} + host_ip_man.main.args.interface = "wlan0" + host_ip_man.main.args.access_point = None - mock_ifaddresses.side_effect = mock_ifaddresses_side_effect + mock_ifaddresses.return_value = { + netifaces.AF_INET6: [{"addr": "fe80::1234:abcd%wlan0"}] + } result = host_ip_man._get_host_ips() - assert result == {"wlan0": "10.0.0.5", "eth0": "192.168.0.8"} + assert result == {"wlan0": "fe80::1234:abcd"} -def test_get_host_ips_growing_zeek_dir(mocker): - """Test _get_host_ips when using multiple interfaces via --access-point.""" +@patch("netifaces.ifaddresses") +def test_get_host_ips_skips_loopback(mock_ifaddresses): + """Test _get_host_ips ignores loopback addresses.""" host_ip_man = ModuleFactory().create_host_ip_manager_obj() - host_ip_man.main.args.interface = None - host_ip_man.main.args.growing = True + host_ip_man.main.args.interface = "lo" host_ip_man.main.args.access_point = None - host_ip_man._get_default_host_ip = Mock(return_value="10.0.0.5") - mocker.patch( - "slips_files.common.slips_utils.Utils.infer_used_interface", - return_value="eth0", - ) + mock_ifaddresses.return_value = { + netifaces.AF_INET: [{"addr": "127.0.0.1"}] + } result = host_ip_man._get_host_ips() - assert result == {"eth0": "10.0.0.5"} + assert result == {} @pytest.mark.parametrize( diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py index 765aa73a44..125a198a6b 100644 --- a/tests/test_process_manager.py +++ b/tests/test_process_manager.py @@ -34,6 +34,7 @@ def test_start_input_process( process_manager.main.zeek_bro = zeek_or_bro process_manager.main.zeek_dir = zeek_dir process_manager.main.line_type = line_type + process_manager.main.bloom_filters_man = Mock() with patch("managers.process_manager.Input") as mock_input: mock_input_process = Mock() @@ -51,6 +52,7 @@ def test_start_input_process( process_manager.main.args, process_manager.main.conf, process_manager.main.pid, + process_manager.main.bloom_filters_man, is_input_done=process_manager.is_input_done, profiler_queue=process_manager.profiler_queue, input_type=input_type, @@ -394,6 +396,7 @@ def test_print_stopped_module(): def test_start_profiler_process(): process_manager = ModuleFactory().create_process_manager_obj() + process_manager.main.bloom_filters_man = Mock() with patch("managers.process_manager.Profiler") as mock_profiler: mock_profiler_process = Mock() mock_profiler.return_value = mock_profiler_process @@ -410,6 +413,7 @@ def test_start_profiler_process(): process_manager.main.args, process_manager.main.conf, process_manager.main.pid, + process_manager.main.bloom_filters_man, is_profiler_done=process_manager.is_profiler_done, profiler_queue=process_manager.profiler_queue, is_profiler_done_event=process_manager.is_profiler_done_event, @@ -432,6 +436,7 @@ def test_start_profiler_process(): ) def test_start_evidence_process(output_dir, redis_port): process_manager = ModuleFactory().create_process_manager_obj() + process_manager.main.bloom_filters_man = Mock() process_manager.main.args.output = output_dir process_manager.main.redis_port = redis_port @@ -451,6 +456,7 @@ def test_start_evidence_process(output_dir, redis_port): process_manager.main.args, process_manager.main.conf, process_manager.main.pid, + process_manager.main.bloom_filters_man, ) mock_evidence_process.start.assert_called_once() process_manager.main.print.assert_called_once() diff --git a/tests/test_whitelist.py b/tests/test_whitelist.py index 35ce8768a3..855c0bb47b 100644 --- a/tests/test_whitelist.py +++ b/tests/test_whitelist.py @@ -3,7 +3,7 @@ from tests.module_factory import ModuleFactory import pytest import json -from unittest.mock import MagicMock, patch, Mock +from unittest.mock import MagicMock, patch, Mock, mock_open from slips_files.core.structures.evidence import ( Direction, IoCType, @@ -115,50 +115,132 @@ def test_get_dst_domains_of_flow(flow_type, expected_result): @pytest.mark.parametrize( - "ip, org, org_ips, expected_result", + "ip, org, cidrs, mock_bf_octets, expected_result", [ - ("216.58.192.1", "google", {"216": ["216.58.192.0/19"]}, True), - ("8.8.8.8", "cloudflare", {"216": ["216.58.192.0/19"]}, False), - ("8.8.8.8", "google", {}, False), # no org ip info + # Case 1: Bloom filter hit, DB hit + ("216.58.192.1", "google", ["216.58.192.0/19"], ["216"], True), + # Case 2: Bloom filter hit, DB miss + ("8.8.8.8", "cloudflare", [], ["8"], False), + # Case 3: Bloom filter MISS + # The 'ip' starts with "192", but we'll only put "10" in the filter + ("192.168.1.1", "my_org", [], ["10"], False), ], ) -def test_is_ip_in_org( +def test_is_ip_in_org_complete( ip, org, - org_ips, + cidrs, + mock_bf_octets, expected_result, ): whitelist = ModuleFactory().create_whitelist_obj() - whitelist.db.get_org_ips.return_value = org_ips - result = whitelist.org_analyzer.is_ip_in_org(ip, org) + analyzer = whitelist.org_analyzer + analyzer.bloom_filters = {org: {"first_octets": mock_bf_octets}} + + whitelist.db.is_ip_in_org_ips.return_value = cidrs + + result = analyzer.is_ip_in_org(ip, org) assert result == expected_result @pytest.mark.parametrize( - "domain, org, org_domains, expected_result", + "domain, org, mock_bf_domains, mock_db_exact, mock_db_org_list, " + "mock_tld_side_effect, expected_result", [ - ("www.google.com", "google", json.dumps(["google.com"]), True), - ("www.example.com", "google", json.dumps(["google.com"]), None), + # --- Case 1: Bloom Filter MISS --- + # The domain isn't even in the bloom filter. + ("google.com", "google", ["other.com"], None, None, None, False), + # --- Case 2: Bloom Filter HIT, DB Exact Match HIT --- + # BF hits, and db.is_domain_in_org_domains finds it. + ("google.com", "google", ["google.com"], True, None, None, True), + # --- Case 3: Subdomain Match (org_domain IN domain) --- + # 'google.com' (from db) is IN 'ads.google.com' (flow domain) + ( + "ads.google.com", + "google", + ["ads.google.com"], # 1. BF Hit + False, # 2. DB Exact Miss + ["google.com"], # 3. DB Org List + ["google.com", "google.com"], # 4. TLDs match (ads.google.com + # -> google.com, google.com -> google.com) + True, # 5. Expected: True + ), + # --- Case 4: Reverse Subdomain Match (domain IN org_domain) --- + # 'google.com' (flow domain) is IN 'ads.google.com' (from db) ( - "www.google.com", + "google.com", "google", - json.dumps([]), - None, - ), # no org domain info + ["google.com"], # 1. BF Hit + False, # 2. DB Exact Miss + ["ads.google.com"], # 3. DB Org List + ["google.com", "google.com"], # 4. TLDs match + True, # 5. Expected: True + ), + # --- Case 5: TLD Mismatch --- + # TLDs (google.net vs google.com) don't match, so 'continue' is hit. + ( + "google.net", + "google", + ["google.net"], # 1. BF Hit + False, # 2. DB Exact Miss + ["google.com"], # 3. org_domains + ["google.net", "google.com"], # 4. TLDs mismatch + False, # 5. Expected: False + ), + # --- Case 6: No Match (Falls through) --- + # TLDs match, but neither is a substring of the other. + ( + "evil-oogle.com", + "google", + ["evil-google.com"], # 1. BF should Hit + False, # 2. DB Exact Miss + ["google.com"], # 3. org_domains + ["google.com", "google.com"], # 4. TLDs match + False, # 5. Expected: False + ), ], ) def test_is_domain_in_org( domain, org, - org_domains, + mock_bf_domains, + mock_db_exact, + mock_db_org_list, + mock_tld_side_effect, expected_result, ): whitelist = ModuleFactory().create_whitelist_obj() - whitelist.db.get_org_info.return_value = org_domains - result = whitelist.org_analyzer.is_domain_in_org(domain, org) + analyzer = whitelist.org_analyzer + + analyzer.bloom_filters = {org: {"domains": mock_bf_domains}} + + whitelist.db.is_domain_in_org_domains.return_value = mock_db_exact + + whitelist.db.get_org_info.return_value = mock_db_org_list + # The first call is for 'domain', the second for 'org_domain' + if mock_tld_side_effect: + analyzer.domain_analyzer.get_tld = MagicMock( + side_effect=mock_tld_side_effect + ) + result = analyzer.is_domain_in_org(domain, org) assert result == expected_result +def test_is_domain_in_org_key_error(): + """ + Tests the 'try...except KeyError' block. + This happens if the 'org' isn't in the bloom_filters dict. + """ + whitelist = ModuleFactory().create_whitelist_obj() + analyzer = whitelist.org_analyzer + analyzer.bloom_filters = {} + # Accessing analyzer.bloom_filters["google"] will raise a KeyError, + # which should be caught and return False. + result = analyzer.is_domain_in_org("google.com", "google") + + assert not result + + @pytest.mark.parametrize( "is_whitelisted_victim, is_whitelisted_attacker, expected_result", [ @@ -188,14 +270,14 @@ def test_is_whitelisted_evidence( "b1:b1:b1:c1:c2:c3", Direction.SRC, False, - {"b1:b1:b1:c1:c2:c3": {"from": "src", "what_to_ignore": "alerts"}}, + {"from": "src", "what_to_ignore": "alerts"}, ), ( "5.6.7.8", "a1:a2:a3:a4:a5:a6", Direction.DST, True, - {"a1:a2:a3:a4:a5:a6": {"from": "dst", "what_to_ignore": "both"}}, + {"from": "dst", "what_to_ignore": "both"}, ), ("9.8.7.6", "c1:c2:c3:c4:c5:c6", Direction.SRC, False, {}), ], @@ -208,8 +290,15 @@ def test_profile_has_whitelisted_mac( whitelisted_macs, ): whitelist = ModuleFactory().create_whitelist_obj() + # act as it is present in the bloom filter + whitelist.bloom_filters.mac_addrs = mac_address + whitelist.db.get_mac_addr_from_profile.return_value = mac_address - whitelist.db.get_whitelist.return_value = whitelisted_macs + if whitelisted_macs: + whitelist.db.is_whitelisted.return_value = json.dumps(whitelisted_macs) + else: + whitelist.db.is_whitelisted.return_value = None + assert ( whitelist.mac_analyzer.profile_has_whitelisted_mac( profile_ip, direction, "both" @@ -237,14 +326,16 @@ def test_matching_direction(direction, whitelist_direction, expected_result): @pytest.mark.parametrize( "ioc_data, expected_result", [ + # Private IP should short-circuit -> False ( { "ioc_type": IoCType.IP, - "value": "1.2.3.4", + "value": "192.168.1.1", "direction": Direction.SRC, }, False, ), + # Domain belonging to whitelisted org -> True ( { "ioc_type": IoCType.DOMAIN, @@ -253,6 +344,7 @@ def test_matching_direction(direction, whitelist_direction, expected_result): }, True, ), + # Public IP not in whitelisted org -> False ( { "ioc_type": IoCType.IP, @@ -263,51 +355,62 @@ def test_matching_direction(direction, whitelist_direction, expected_result): ), ], ) -def test_is_part_of_a_whitelisted_org( - ioc_data, - expected_result, -): +def test_is_part_of_a_whitelisted_org(ioc_data, expected_result): whitelist = ModuleFactory().create_whitelist_obj() - whitelist.db.get_whitelist.return_value = { - "google": {"from": "both", "what_to_ignore": "both"} + whitelist.org_analyzer.whitelisted_orgs = { + "google": json.dumps({"from": "both", "what_to_ignore": "both"}) } - whitelist.db.get_org_info.return_value = json.dumps(["1.2.3.4/32"]) - whitelist.db.get_ip_info.return_value = {"asn": {"asnorg": "Google"}} - whitelist.db.get_org_info.return_value = json.dumps(["example.com"]) - # we're mocking either an attacker or a victim obj - mock_ioc = MagicMock() - mock_ioc.value = ioc_data["value"] - mock_ioc.direction = ioc_data["direction"] - mock_ioc.ioc_type = ioc_data["ioc_type"] - assert ( - whitelist.org_analyzer._is_part_of_a_whitelisted_org( - mock_ioc.value, mock_ioc.ioc_type, mock_ioc.direction, "both" - ) - == expected_result + # mock dependent methods + whitelist.org_analyzer.is_domain_in_org = MagicMock(return_value=True) + whitelist.org_analyzer.is_ip_part_of_a_whitelisted_org = MagicMock( + return_value=False ) + whitelist.match = MagicMock() + whitelist.match.direction.return_value = True + whitelist.match.what_to_ignore.return_value = True + + with patch( + "slips_files.core.helpers.whitelist.organization_whitelist." + "utils.is_private_ip", + return_value=False, + ): + result = whitelist.org_analyzer._is_part_of_a_whitelisted_org( + ioc=ioc_data["value"], + ioc_type=ioc_data["ioc_type"], + direction=ioc_data["direction"], + what_to_ignore="both", + ) + + assert result == expected_result + @pytest.mark.parametrize( - "dst_domains, src_domains, whitelisted_domains, expected_result", + "dst_domains, src_domains, whitelisted_domains, " + "is_whitelisted_return_vals, expected_result", [ ( ["dst_domain.net"], ["apple.com"], {"apple.com": {"from": "src", "what_to_ignore": "both"}}, + [False, True], True, ), ( - ["apple.com"], + ["apple.com"], # dst domains, shouldnt be whitelisted ["src.com"], {"apple.com": {"from": "src", "what_to_ignore": "both"}}, + [False, False], False, ), - (["apple.com"], ["src.com"], {}, False), # no whitelist found + (["apple.com"], ["src.com"], {}, [False, False], False), + # no whitelist found ( # no flow domains found [], [], {"apple.com": {"from": "src", "what_to_ignore": "both"}}, + [False, False], False, ), ], @@ -316,22 +419,19 @@ def test_check_if_whitelisted_domains_of_flow( dst_domains, src_domains, whitelisted_domains, + is_whitelisted_return_vals, expected_result, ): whitelist = ModuleFactory().create_whitelist_obj() + whitelist.bloom_filters.domains = list(whitelisted_domains.keys()) whitelist.db.get_whitelist.return_value = whitelisted_domains - whitelist.domain_analyzer.is_domain_in_tranco_list = Mock() - whitelist.domain_analyzer.is_domain_in_tranco_list.return_value = False - - whitelist.domain_analyzer.get_dst_domains_of_flow = Mock() - whitelist.domain_analyzer.get_dst_domains_of_flow.return_value = ( - dst_domains + whitelist.domain_analyzer.get_src_domains_of_flow = Mock( + return_value=src_domains ) - whitelist.domain_analyzer.get_src_domains_of_flow = Mock() - whitelist.domain_analyzer.get_src_domains_of_flow.return_value = ( - src_domains + whitelist.domain_analyzer.is_whitelisted = Mock( + side_effect=is_whitelisted_return_vals ) flow = Mock() @@ -344,6 +444,7 @@ def test_is_whitelisted_domain_not_found(): Test when the domain is not found in the whitelisted domains. """ whitelist = ModuleFactory().create_whitelist_obj() + whitelist.bloom_filters.domains = [] whitelist.db.get_whitelist.return_value = {} whitelist.db.is_whitelisted_tranco_domain.return_value = False domain = "nonwhitelisteddomain.com" @@ -379,9 +480,17 @@ def test_read_configuration( ) def test_ip_analyzer_is_whitelisted(ip, what_to_ignore, expected_result): whitelist = ModuleFactory().create_whitelist_obj() - whitelist.db.get_whitelist.return_value = { - "1.2.3.4": {"from": "both", "what_to_ignore": "both"} - } + whitelist.bloom_filters.ips = [ip] # Simulate presence in bloom + # filter, because we wanna test the rest of the logic + + # only this ip is whitelisted + if ip == "1.2.3.4": + whitelist.db.is_whitelisted.return_value = json.dumps( + {"from": "both", "what_to_ignore": "both"} + ) + else: + whitelist.db.is_whitelisted.return_value = None + assert ( whitelist.ip_analyzer.is_whitelisted(ip, Direction.SRC, what_to_ignore) == expected_result @@ -485,47 +594,67 @@ def test_is_whitelisted_entity_victim( @pytest.mark.parametrize( - "org, expected_result", + "org, file_content, expected_result", [ - ("google", ["google.com", "google.co.uk"]), - ("microsoft", ["microsoft.com", "microsoft.net"]), + ( + "google", + "google.com\ngoogle.co.uk\n", + ["google.com", "google.co.uk"], + ), + ( + "microsoft", + "microsoft.com\nmicrosoft.net\n", + ["microsoft.com", "microsoft.net"], + ), ], ) -def test_load_org_domains( - org, - expected_result, -): +def test_load_org_domains(org, file_content, expected_result): whitelist = ModuleFactory().create_whitelist_obj() whitelist.db.set_org_info = MagicMock() - actual_result = whitelist.parser.load_org_domains(org) - for domain in expected_result: - assert domain in actual_result + # Mock the file open for reading org domains + with patch("builtins.open", mock_open(read_data=file_content)): + actual_result = whitelist.parser.load_org_domains(org) - assert len(actual_result) >= len(expected_result) - whitelist.db.set_org_info.assert_called_with( - org, json.dumps(actual_result), "domains" + # Check contents + assert actual_result == expected_result + whitelist.db.set_org_info.assert_called_once_with( + org, expected_result, "domains" ) @pytest.mark.parametrize( - "domain, direction, expected_result", + "domain, direction, is_whitelisted_return, expected_result", [ - ("example.com", Direction.SRC, True), - ("test.example.com", Direction.DST, True), - ("malicious.com", Direction.SRC, False), + ( + "example.com", + Direction.SRC, + {"from": "both", "what_to_ignore": "both"}, + True, + ), + ( + "test.example.com", + Direction.DST, + {"from": "both", "what_to_ignore": "both"}, + True, + ), + ("malicious.com", Direction.SRC, {}, False), ], ) def test_is_domain_whitelisted( domain, direction, + is_whitelisted_return, expected_result, ): whitelist = ModuleFactory().create_whitelist_obj() - whitelist.db.get_whitelist.return_value = { - "example.com": {"from": "both", "what_to_ignore": "both"} - } + whitelist.db.is_whitelisted.return_value = json.dumps( + is_whitelisted_return + ) + whitelist.db.is_whitelisted_tranco_domain.return_value = False + whitelist.bloom_filters.domains = ["example.com"] + for type_ in ("alerts", "flows"): result = whitelist.domain_analyzer.is_whitelisted( domain, direction, type_ @@ -539,36 +668,36 @@ def test_is_domain_whitelisted( ( "8.8.8.8", "google", - json.dumps(["AS6432"]), + ["AS6432"], {"asn": {"number": "AS6432"}}, True, ), ( "1.1.1.1", "cloudflare", - json.dumps(["AS6432"]), + ["AS6432"], {"asn": {"number": "AS6432"}}, True, ), ( "8.8.8.8", "Google", - json.dumps(["AS15169"]), + ["AS15169"], {"asn": {"number": "AS15169", "asnorg": "Google"}}, True, ), ( "1.1.1.1", "Cloudflare", - json.dumps(["AS13335"]), + ["AS13335"], {"asn": {"number": "AS15169", "asnorg": "Google"}}, False, ), - ("9.9.9.9", "IBM", json.dumps(["AS36459"]), {}, None), + ("9.9.9.9", "IBM", ["AS36459"], {}, False), ( "9.9.9.9", "IBM", - json.dumps(["AS36459"]), + ["AS36459"], {"asn": {"number": "Unknown"}}, False, ), @@ -578,80 +707,15 @@ def test_is_ip_asn_in_org_asn( ip, org, org_asn_info, ip_asn_info, expected_result ): whitelist = ModuleFactory().create_whitelist_obj() - whitelist.db.get_org_info.return_value = org_asn_info + + whitelist.db = MagicMock() whitelist.db.get_ip_info.return_value = ip_asn_info - assert ( - whitelist.org_analyzer.is_ip_asn_in_org_asn(ip, org) == expected_result - ) + whitelist.db.get_org_info.return_value = org_asn_info + ip_asn = ip_asn_info.get("asn", {}).get("number", None) + whitelist.org_analyzer._is_asn_in_org = MagicMock( + return_value=ip_asn in org_asn_info + ) -# TODO for sekhar -# @pytest.mark.parametrize( -# "flow_data, whitelist_data, expected_result", -# [ -# ( # testing_is_whitelisted_flow_with_whitelisted_organization_ -# # but_ip_or_domain_not_whitelisted -# MagicMock(saddr="9.8.7.6", daddr="5.6.7.8", type_="http", host="org.com"), -# {"organizations": {"org": {"from": "both", "what_to_ignore": "flows"}}}, -# False, -# ), -# ( # testing_is_whitelisted_flow_with_non_whitelisted_organizatio -# # n_but_ip_or_domain_whitelisted -# MagicMock( -# saddr="1.2.3.4", -# daddr="5.6.7.8", -# type_="http", -# host="whitelisted.com", -# ), -# {"IPs": {"1.2.3.4": {"from": "src", "what_to_ignore": "flows"}}}, -# False, -# ), -# ( # testing_is_whitelisted_flow_with_whitelisted_source_ip -# MagicMock( -# saddr="1.2.3.4", -# daddr="5.6.7.8", -# type_="http", -# server_name="example.com", -# ), -# {"IPs": {"1.2.3.4": {"from": "src", "what_to_ignore": "flows"}}}, -# False, -# ), -# ( # testing_is_whitelisted_flow_with_both_source_and_destination_ips_whitelisted -# MagicMock(saddr="1.2.3.4", daddr="5.6.7.8", type_="http"), -# { -# "IPs": { -# "1.2.3.4": {"from": "src", "what_to_ignore": "flows"}, -# "5.6.7.8": {"from": "dst", "what_to_ignore": "flows"}, -# } -# }, -# False, -# ), -# ( -# # testing_is_whitelisted_flow_with_whitelisted_mac_address_but_ip_not_whitelisted -# MagicMock( -# saddr="9.8.7.6", -# daddr="1.2.3.4", -# smac="b1:b1:b1:c1:c2:c3", -# dmac="a1:a2:a3:a4:a5:a6", -# type_="http", -# server_name="example.org", -# ), -# { -# "mac": { -# "b1:b1:b1:c1:c2:c3": { -# "from": "src", -# "what_to_ignore": "flows", -# } -# } -# }, -# False, -# ), -# ], -# ) -# def test_is_whitelisted_flow( flow_data, whitelist_data, expected_result): -# """ -# Test the is_whitelisted_flow method with various combinations of flow data and whitelist data. -# """ -# whitelist.db.get_all_whitelist.return_value = whitelist_data -# whitelist = ModuleFactory().create_whitelist_obj() -# assert whitelist.is_whitelisted_flow(flow_data) == expected_result + result = whitelist.org_analyzer.is_ip_asn_in_org_asn(ip, org) + assert result == expected_result