diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..90d285f --- /dev/null +++ b/.env.example @@ -0,0 +1,15 @@ +# Copy to .env and fill in secrets +VT_API_KEY= +# Optional AbuseIPDB (https://www.abuseipdb.com/) API key +ABUSEIPDB_API_KEY= +# Or multiple keys (comma-separated) to distribute load respectfully +ABUSEIPDB_API_KEYS= +# Optional outbound proxies (comma-separated). Examples: +# PROXY_LIST=http://user:pass@1.2.3.4:8080,https://5.6.7.8:8443,socks5://9.9.9.9:1080 +PROXY_LIST= +# GROQ (LLM) optional keys (comma-separated) +GROQ_API_KEYS= +# Optional path to a newline-separated list of known-bad IPs (offline escalation) +OFFLINE_IP_BLOCKLIST= +# Optional token budget (not required for this tool) +GROQ_TOKENS_BUDGET= diff --git a/.gitignore b/.gitignore index 968643b..1d48f4d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,41 +1,15 @@ -# Environments -.venv/ -env/ -.env -.env.* - -# Python __pycache__/ -*.pyc - -# Data -data/raw/* -!data/raw/.gitkeep -data/processed/* -!data/processed/.gitkeep - -# Cache -data/cache/* -!data/cache/.gitkeep - -# Tool caches / reports .pytest_cache/ .mypy_cache/ -.ruff_cache/ -htmlcov/ -.coverage -coverage.xml - -# Notebooks -*.ipynb_checkpoints/ - -# OS -.DS_Store -Thumbs.db - +.venv/ +.env +data/cache/*.json +data/processed/* +data/raw/* +docs/*.mp4 .coverage -.pytest_cache/ -.mypy_cache/ -.ruff_cache/ -htmlcov/ -coverage.xml +.debug/ +*Zone.Identifier +*.pyc +*.pyo +*.DS_Store diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..570d8c0 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "workbench.colorCustomizations": { + "terminal.background": "#00000000", + "minimap.background": "#00000000", + "scrollbar.shadow": "#00000000" + } +} \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index 103eb79..d56fd24 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,37 +1,48 @@ # Repository Guidelines +This AGENTS.md guides both human and agent contributors. Its scope is the entire repository. + ## Project Structure & Module Organization -- Root: currently contains `access_log.txt` and project PDFs. Move long‑form write‑ups to `docs/` and raw logs to `data/raw/` as the repo evolves. -- `src/`: Python modules for log parsing, enrichment, and CTI lookups (e.g., `src/parsers/`, `src/enrichers/`). -- `notebooks/`: exploratory analysis; keep outputs cleared before commit. -- `tests/`: unit tests mirroring `src/` layout (e.g., `tests/parsers/test_nginx.py`). -- `docs/`: reports, diagrams, and usage notes. +- Root: currently `access_log.txt` and project PDFs; move long‑form write‑ups to `docs/` and raw logs to `data/raw/` as the repo evolves. +- `src/`: Python modules for parsing, scoring, enrichment, and CTI (e.g., `src/parsers/`, `src/enrichers/`). +- `notebooks/`: exploratory analysis; clear outputs before commit. +- `tests/`: unit tests mirroring `src/` (e.g., `tests/parsers/test_nginx.py`); fixtures in `tests/fixtures/`. +- `docs/`: reports, diagrams, usage notes. `data/`: `raw/` inputs; `cache/` (e.g., `data/cache/cti_cache.json`). ## Build, Test, and Development Commands -- Create env: `python -m venv .venv && source .venv/bin/activate`. -- Install deps: `pip install -r requirements.txt` (add one if code is introduced). -- Run tests: `pytest -q`. -- Lint/format: `ruff check . && ruff format .` (or `black . && isort .` if preferred). -- Type check: `mypy src`. +- Create env: `python -m venv .venv && source .venv/bin/activate` +- Install deps: `pip install -r requirements.txt` +- Lint/format: `ruff check . && ruff format .` (or `black . && isort .`) +- Type check: `mypy src` +- Run tests: `pytest -q` (coverage: `pytest --cov=src`) +- Run UI: `streamlit run src/ui/streamlit_app.py` + - One‑click: `./run.sh setup`, `./run.sh scan `, or `./run.sh ui` ## Coding Style & Naming Conventions -- Python 3.10+; 4‑space indentation; UTF‑8. +- Python 3.10+, UTF‑8, 4‑space indentation. - Names: modules/functions `lower_snake_case`, classes `PascalCase`, constants `UPPER_SNAKE_CASE`. - Files: logs `data/raw/YYYYMMDD_source.log`; notebooks `notebooks/_.ipynb`. - Keep functions <50 lines where practical; document public functions with docstrings. ## Testing Guidelines -- Framework: `pytest`; minimum 80% coverage measured via `pytest --cov=src`. -- Layout: mirror `src/` with `test_*.py`; use fixtures for sample logs under `tests/fixtures/`. -- Determinism: do not read network in tests; mock CTI APIs. +- Framework: `pytest`; mirror `src/` layout; fixtures under `tests/fixtures/`. +- Determinism: no network in tests; mock CTI/LLM calls; target ≥80% coverage. ## Commit & Pull Request Guidelines - Commits: Conventional Commits (e.g., `feat(parser): add nginx status extraction`). -- PRs: concise summary, linked issue, before/after notes, and if UI/data changes, include a small sample input and expected output. -- Size: prefer ≤300 lines diff; split larger changes. +- PRs: concise summary, linked issue, before/after notes; if UI/data changes, include small sample input and expected output. Prefer ≤300 lines diff. ## Security & Data Handling -- Do not commit secrets or tokens; use `.env` and provide `.env.example`. -- Anonymize or truncate sensitive log data before committing. -- Large files: store raw datasets outside git or via LFS; keep only small, representative fixtures. +- Never commit secrets; use `.env` and provide `.env.example`. +- Anonymize/truncate sensitive logs; store large datasets outside git or via LFS. + - API keys: `VT_API_KEY`, `ABUSEIPDB_API_KEY` (optional). Respect rate limits. +## Scalable, Budget‑Aware Processing (Project‑Specific) +- Offline‑first; aggregate then sample; cache and dedupe. Defaults: `--llm-group-by ip`, `--llm-sample 200`, `--cti-scope suspicious` with `--cti-max 200`. + - Offline‑first; aggregate then sample; cache and dedupe. Defaults: `--llm-group-by ip`, `--llm-sample 200`, `--cti-scope suspicious` with `--cti-max 200` (use `--cti-max -1` to scan all IPs). +- Budget throttle: `export GROQ_TOKENS_BUDGET=150000`. +- Examples: + - Huge logs: `python -m src.cli data/raw/big.log --out data/processed --llm-group-by ip --llm-sample 200 --cti-scope suspicious --cti-max 200 --color never` + - Strictly offline: `python -m src.cli data/raw/big.log --out data/processed --no-llm --no-cti --no-reports` + - IP scan to PDF (CLI): `python -m src.cli scan-ips data/sample_ips.txt --out data/processed --cti-max -1` + - IP scan (UI): `streamlit run src/ui/streamlit_app.py` diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1648b50 --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +VENV?=.venv +PY?=$(VENV)/bin/python +PIP?=$(VENV)/bin/pip + +.PHONY: setup install lint fmt test scan scan-all ui help doctor scan-file + +setup: + python -m venv $(VENV) + . $(VENV)/bin/activate; $(PIP) install -r requirements.txt + +install: + . $(VENV)/bin/activate; $(PIP) install -r requirements.txt + +lint: + . $(VENV)/bin/activate; $(PY) -m ruff check . || true + +fmt: + . $(VENV)/bin/activate; $(PY) -m ruff format . || true + +test: + . $(VENV)/bin/activate; pytest -q || true + +scan: + . $(VENV)/bin/activate; $(PY) -m src.cli scan-ips data/sample_ips.txt --out data/processed --no-cti + +scan-all: + . $(VENV)/bin/activate; $(PY) -m src.cli scan-ips data/sample_ips.txt --out data/processed --cti-max -1 + +ui: + . $(VENV)/bin/activate; streamlit run src/ui/streamlit_app.py + +help: + @echo "Targets:" && \ + echo " make setup # Create venv and install deps" && \ + echo " make scan # Offline demo scan -> PDF" && \ + echo " make scan-all # Demo scan with CTI (uses VT_API_KEY)" && \ + echo " make scan-file FILE=path CTI_MAX=-1 RATE=0.8 BURST=1 SAVE=50 # Full control" && \ + echo " make ui # Launch Streamlit UI" && \ + echo " make lint|fmt|test" && \ + echo " make doctor # Quick environment check" + +doctor: + @python3 -c 'import sys; print("Python:", sys.version.split()[0]); assert sys.version_info[:2] >= (3,10)' || (echo "Python 3.10+ required" && exit 1) + @[ -f .env ] || echo "Note: .env not found (optional). Copy .env.example -> .env" + @[ -n "$$VT_API_KEY" ] || echo "Note: VT_API_KEY not set; CTI calls will be disabled." + +# Example: make scan-file FILE=data/sample_ips.txt CTI_MAX=-1 RATE=0.8 BURST=1 SAVE=25 +scan-file: + @[ -n "$(FILE)" ] || (echo "Usage: make scan-file FILE=path [CTI_MAX=-1] [RATE=0.8] [BURST=1] [SAVE=50]" && exit 1) + . $(VENV)/bin/activate; \ + $(PY) -m src.cli scan-ips $(FILE) --out data/processed --cti-max $${CTI_MAX:--1} \ + --cti-rate $${RATE:-0.8} --cti-burst $${BURST:-1} --save-every $${SAVE:-50} diff --git a/README.md b/README.md new file mode 100644 index 0000000..bcf4c4c --- /dev/null +++ b/README.md @@ -0,0 +1,139 @@ +# LogCTIAI — Offline‑First Log Analysis + CTI (LLM‑Optional) + +Bu layihə (AZ): böyük həcmli server/web loglarını emal edir, qruplaşdırılmış LLM şərhləri (istəyə görə) və CTI zənginləşdirməsi ilə təhlükə siqnallarını çıxarır, nəticədə yığcam və təkrarlana bilən hesabatlar yaradır. Şəbəkədən minimal istifadə və büdcə nəzarəti üçün optimallaşdırılıb. + +This project ingests large web/server logs, enriches events with optional LLM analysis, performs CTI lookups against external sources, and generates concise human‑readable reports. It is designed to run reliably on very large datasets with minimal network usage: + +- Auto‑detects `.txt` vs `.log` inputs; parses recognized log lines in `.txt` files. +- Minimizes LLM calls via grouping, sampling, and gates; enforces an optional token budget. +- Minimizes CTI calls via suspicious‑first scoping, caps, batching, and strong caching. +- Works fully offline and degrades gracefully when network or budgets are unavailable. + +See `docs/USAGE.md` for practical commands and tips. See `AGENTS.md` for project conventions and the scalable processing strategy. + +![Mindmap](docs/ProjectMindmapv0.5.png) + +## Quickstart + +- Create env: `python -m venv .venv && source .venv/bin/activate` +- Install deps: `pip install -r requirements.txt` +- Run on a log (auto‑detects `.txt` that look like logs): + - `python -m src.cli data/raw/access_log.txt --out data/processed --summary --preview 3` + - Outputs `data/processed/access_log.jsonl` and `data/processed/reports/` with `.txt` and `.md`. + +### IP Threat Scanner (CLI & UI) + +This repo also includes a fast, offline‑first IP CTI scanner with caching, PDF/JSON/CSV outputs, and a Streamlit UI. + +- CLI (offline demo): `python -m src.cli scan-ips data/sample_ips.txt --out data/processed --no-cti` +- CLI (with CTI): `VT_API_KEYS=vt_key1,vt_key2 ABUSEIPDB_API_KEYS=ab1,ab2 python -m src.cli scan-ips data/sample_ips.txt --out data/processed --cti-max 200 --cti-rate 1 --cti-burst 1 --workers 2` +- UI: `streamlit run src/ui/streamlit_app.py` (clean UI with optional AI executive summary embedded in the exported PDF) + +Environment (see `.env.example`): +- VirusTotal: `VT_API_KEY` or `VT_API_KEYS` (comma‑separated) +- AbuseIPDB: `ABUSEIPDB_API_KEY` or `ABUSEIPDB_API_KEYS` (comma‑separated) +- Optional proxies (resiliency, not for evading quotas): `PROXY_LIST="http://1.2.3.4:8080,socks5://5.6.7.8:1080"` +- Offline blocklist: `OFFLINE_IP_BLOCKLIST=/path/to/bad_ips.txt` + +Notes: +- The scanner respects provider rate limits and `Retry-After`; it rotates your keys and proxies on 429/403 and caches results. +- VirusTotal has no API‑less access; provide an API key to query VT. + +If LLM keys are not configured, enrichment runs offline with `severity=unknown` placeholders and continues to produce reports. + +## CLI Overview + +`python -m src.cli --out [options]` + +Common options: + +- `--verbose quiet|normal|max`: control console verbosity (default: `max`). +- `--no-llm`: disable LLM enrichment (default if no keys set). +- `--no-cti`: skip CTI lookups; run fully offline. +- `--no-reports`: skip generating text/markdown reports. +- `--limit N`: process only the first N lines. +- `--format jsonl|csv`: output for enriched events (default: `jsonl`). +- `--color auto|always|never`: terminal color policy. +- `--ai-malicious-report`: after CTI summarization, ask the LLM for a detailed malicious-activity report (saved under `reports/`). + +LLM request control: + +- `--llm-group-by none|ip|signature`: group before LLM calls (default: `ip`); `signature` groups by `ip+path+status+ua`. +- `--group-window SECONDS`: add a time bucket to grouping (e.g., `60`). +- `--llm-sample N`: send only N groups to LLM; the rest are annotated as sampled/gated out (default: `200`). +- `--llm-gate-4xx N`: only send groups with ≥N 4xx responses. +- `--llm-gate-ua`: only send groups with suspicious user‑agents. + +CTI request control: + +- `--cti-scope suspicious|all`: lookup only suspicious IPs (default) or all IPs. +- `--cti-max N`: cap number of IPs to query for CTI (0=unlimited; default: `100`). +- `--cti-batch-size N`, `--cti-batch-pause S`: batch CTI queries and pause between batches; cache flushes periodically. + +Examples (large logs): + +- Minimal network usage: + - `python -m src.cli data/raw/big.log --out data/processed --llm-group-by ip --group-window 60 --llm-gate-4xx 5 --llm-sample 200 --cti-scope suspicious --cti-max 200` +- Strictly offline (fastest): + - `python -m src.cli data/raw/big.log --out data/processed --no-llm --no-cti --no-reports` + +## Environment + +Create a `.env` (see variables below). Keys are optional; the tool runs offline without them. + +- `GROQ_API_KEYS`: comma‑separated LLM keys for rotation. +- `GROQ_MODEL`: Groq model name (default `llama3-8b-8192`). +- `GROQ_TOKENS_BUDGET`: approximate token budget per run/day; enrichment stops before the cap and continues offline. +- `RISK_4XX_THRESHOLD`: per‑IP 4xx threshold to consider suspicious in reports (default `5`). +- `SUSPICIOUS_UA_REGEX`: comma‑separated regex patterns to flag suspicious UAs. +- VirusTotal: `VT_API_KEY` (single) or `VT_API_KEYS` (comma‑separated). +- AbuseIPDB: `ABUSEIPDB_API_KEY` (single) or `ABUSEIPDB_API_KEYS` (comma‑separated). +- Proxies: `PROXY_LIST` comma‑separated list of `http://`, `https://`, or `socks5://` URLs. +- `VT_API_KEY`: VirusTotal API key (optional; CTI works in a degraded mode without it). +- `OFFLINE_IP_BLOCKLIST`: path to a newline‑separated list of known‑bad IPs to escalate risk without CTI calls. + +Budget notes: +- When available, the client uses model‑reported token usage; otherwise it falls back to a conservative character‑based estimate. + +## Outputs + +- Enriched events: `data/processed/.jsonl` (or `.csv` with `--format csv`). +- Reports: `data/processed/reports/report.txt` and `report.md` summarizing activity and suspicious IPs; may include a brief AI note if LLM is enabled. +- Malicious AI report (optional): `data/processed/reports/malicious_ai_report.txt|md` if `--ai-malicious-report` is used and malicious CTI signals are present. +- CTI cache: `data/cache/cti_cache.json` (auto‑created and reused to minimize network calls). + +## Testing + +- Run tests: `pytest -q` +- Optional coverage: `pytest --cov=src -q` (if coverage plugin installed). + +Notes: +- If you used the local venv above, run tests via `.venv/bin/pytest -q`. +- A PyPDF2 deprecation warning may appear; it’s harmless and can be ignored. + +## UI Dashboard + +An optional Streamlit dashboard is included for exploration and client-friendly viewing. + +- Install UI deps (already part of `requirements.txt`). +- Run the UI: `scripts/run_ui.sh` (or `streamlit run ui/app.py`). +- Select an enriched `.jsonl` file from `data/processed/` or upload one. +- View status distribution, sample enriched events, and CTI attributes. + +## Troubleshooting + +- `.txt` auto‑detection: the CLI reads a small sample and parses with `parse_line`. If none match, the file is copied as plain text rather than parsed as logs. +- LLM budget exceeded: you’ll see `LLM budget exhausted` in logs; records are still produced with `severity=unknown` and a rationale explaining sampling/gating. +- CTI failures: the pipeline continues with cached/partial data; use `--no-cti` for fully offline runs. Consider `--cti-max` and batching to avoid rate limits. +- No colors or CI: pass `--color never` for consistent, plain output. + +## Docs + +- Usage guide with more examples: `docs/USAGE.md` +- Principles, strategy, and repo conventions: `AGENTS.md` +- Mindmap/diagram: `docs/ProjectMindmapv0.5.png` +- Project write‑ups: `docs/Final Project - Log Analysis + CTI.pdf` + +--- + +Made with a focus on reliability, scalability, and cost‑awareness. diff --git a/data/assets/flags/FR.png b/data/assets/flags/FR.png new file mode 100644 index 0000000..dff446b Binary files /dev/null and b/data/assets/flags/FR.png differ diff --git a/data/assets/flags/US.png b/data/assets/flags/US.png new file mode 100644 index 0000000..dff446b Binary files /dev/null and b/data/assets/flags/US.png differ diff --git a/data/processed-test/access_log.jsonl b/data/processed-test/access_log.jsonl new file mode 100644 index 0000000..90a2dfd --- /dev/null +++ b/data/processed-test/access_log.jsonl @@ -0,0 +1,185 @@ +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/fan_facemask.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/melon_bike.jpeg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/lemon_juice.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/permafrost.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/green_smoothie.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/fruit_press.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/eggfruit_juice.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/carrot_juice.jpeg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/artwork2.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmEN_&sid=dCq-gfyMWN1lgOZ1AAAC", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/banana_juice.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/apple_pressings.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/apple_juice.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmELD&sid=dCq-gfyMWN1lgOZ1AAAC", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/api/Quantitys/", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/rest/products/search?q=", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/api/Challenges/?name=Score%20Board", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/api/Challenges/?name=Score%20Board", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/rest/languages", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmEGF&sid=dCq-gfyMWN1lgOZ1AAAC", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "POST", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmEG8&sid=dCq-gfyMWN1lgOZ1AAAC", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/JuiceShop_Logo.png", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/rest/admin/application-version", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/rest/admin/application-version", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmECK", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/assets/i18n/en.json", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/styles.css", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/main.js", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/vendor.js", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/polyfills.js", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/runtime.js", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=websocket&sid=JMIIi7MdmkRu-2SAAAAA", "proto": "TLSv1.3", "status": 101, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/fan_facemask.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/melon_bike.jpeg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/lemon_juice.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/permafrost.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/green_smoothie.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/fruit_press.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/eggfruit_juice.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/carrot_juice.jpeg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/banana_juice.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/artwork2.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/apple_pressings.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/apple_juice.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmDs7&sid=JMIIi7MdmkRu-2SAAAAA", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/api/Quantitys/", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/rest/products/search?q=", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/api/Challenges/?name=Score%20Board", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/api/Challenges/?name=Score%20Board", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/MaterialIcons-Regular.woff2", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmDh-&sid=JMIIi7MdmkRu-2SAAAAA", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "POST", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmDhz&sid=JMIIi7MdmkRu-2SAAAAA", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/rest/languages", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/assets/public/images/JuiceShop_Logo.png", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-version", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-version", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:41+00:00", "method": "GET", "path": "/assets/i18n/en.json", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:41+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmDQz", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:41+00:00", "method": "GET", "path": "/assets/public/favicon_js.ico", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/styles.css", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/vendor.js", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/main.js", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/polyfills.js", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/runtime.js", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:39+00:00", "method": "GET", "path": "/", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:16+00:00", "method": "INDEX", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:16+00:00", "method": "", "path": "", "proto": "", "status": 400, "size": null, "ref": null, "ua": null, "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "TRACK", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "TRACK", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "TRACE", "path": "/", "proto": "", "status": 405, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "TRACE", "path": "/", "proto": "", "status": 405, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "PROPFIND", "path": "/", "proto": "", "status": 400, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "DEBUG", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "NVFYPLCD", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "OPTIONS", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "", "path": "", "proto": "", "status": 400, "size": null, "ref": null, "ua": null, "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "", "path": "", "proto": "", "status": 400, "size": null, "ref": null, "ua": null, "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "PUT", "path": "/nikto-test-dfmcL5fa.html", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/group", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/grid", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/greybox", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/green", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/Graphics", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/graphics", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/graph", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/grants", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/granted", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/grant", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/grafik", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gracias", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gr", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gps", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gprs", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gpl", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gpapp", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gp", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/government", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/goto", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/googlebot", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/google_sitemap", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/google", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/goods_script", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/goods", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gone", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/golf", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gold", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/goaway", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/go", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/glossary", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/globes_admin", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/globals", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/globalnav", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/global.asax", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/global.asa", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/Global", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/global", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/glimpse", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/glance_config", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gl", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gitweb", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/git", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gifts", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/giftregs", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/giftreg_manage", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/giftoptions", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/giftcert", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gift", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gifs", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gif", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gid", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gfx", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gfen", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gettxt", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getout", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getjobid", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getFile.cfm", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/get-file", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getfile", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getconfig", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getaccess", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/get_file", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/get", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gestione", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gestion", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gest", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/geronimo", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/german", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/geoip", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/geo", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gentoo", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/generic", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/generator", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/generateditems", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/general", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gen", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/geeklog", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gdform", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gccallback", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gbook", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gb", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gateway", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gate", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/garbage", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/ganglia", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gaming", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/Games", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/games", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gamercard", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/game", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gallery2", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gallery", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/galleries", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/galerie", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/galeria", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gaestebuch", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gadgets", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gadget", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} diff --git a/data/processed-test/new_log.jsonl b/data/processed-test/new_log.jsonl new file mode 100644 index 0000000..90debed --- /dev/null +++ b/data/processed-test/new_log.jsonl @@ -0,0 +1,15 @@ +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/91.0", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/91.0", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A366 Safari/600.1.4", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A366 Safari/600.1.4", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "msnbot/1.1 (+http://search.msn.com/msnbot.htm)", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/robots.txt", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/robots.txt", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "msnbot/1.1 (+http://search.msn.com/msnbot.htm)", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/robots.txt", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/91.0", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} diff --git a/data/sample_ips.txt b/data/sample_ips.txt new file mode 100644 index 0000000..ea1c002 --- /dev/null +++ b/data/sample_ips.txt @@ -0,0 +1,5 @@ +# One IP per line (comments allowed) +8.8.8.8 +1.1.1.1 +208.67.222.222 +185.199.108.153 diff --git a/docs/ProjectMindmapv0.5.png b/docs/ProjectMindmapv0.5.png new file mode 100644 index 0000000..f7e89e1 Binary files /dev/null and b/docs/ProjectMindmapv0.5.png differ diff --git a/docs/USAGE.md b/docs/USAGE.md index 0eee43f..bfc9bea 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -5,19 +5,34 @@ Usage - Basic run (log -> enriched events + reports): - `python -m src.cli data/raw/access_log.txt --out data/processed --summary --preview 3` - Adds `data/processed/access_log.jsonl` and `data/processed/reports/report.txt|md`. + - Any `.log` file is treated as a log. `.txt` files are auto-detected: if they contain recognizable log lines, they are parsed as logs; otherwise they are copied as plain text. Example: `python -m src.cli data/raw/new_log.txt --out data/processed`. - Options: + - `--verbose quiet|normal|max`: control console verbosity (default: `max`). - `--no-llm`: disable LLM enrichment (default if no GROQ keys). - `--no-cti`: disable CTI lookups (scraping/API); runs offline. - `--no-reports`: skip building reports. - `--limit N`: process only the first N lines for quick tests. - `--format jsonl|csv`: choose output for enriched events. - `--color auto|always|never`: terminal color policy. + - `--llm-group-by none|ip|signature`: group records before LLM calls to reduce requests. `ip` groups by source IP (minimal requests). `signature` groups by `ip+path+status+ua`. + - `--llm-sample N`: only send N groups to the LLM; non-selected groups are annotated as `severity=unknown` with rationale `LLM sampled out`. + - `--llm-gate-4xx N`: only send groups with at least N 4xx responses. + - `--llm-gate-ua`: only send groups with suspicious user-agents. + - `--group-window SECONDS`: add a time window bucket to grouping to compress bursts (e.g., `60`). + - `--cti-scope suspicious|all`: look up CTI for only suspicious IPs (based on 4xx and UA) or all IPs. + - `--cti-max N`: maximum number of IPs to query for CTI (0=unlimited). + - `--cti-batch-size N` and `--cti-batch-pause S`: periodically flush cache and pause S seconds between CTI batches. Environment - Copy `.env.example` to `.env` and set: - `GROQ_API_KEYS` for LLM enrichment (comma-separated supported). - `GROQ_MODEL` if you want to change the default. + - Optional CTI provider keys: + - `VT_API_KEY` (VirusTotal IP lookups) + - `OTX_API_KEY` (AlienVault OTX pulses) + - `GREYNOISE_API_KEY` (GreyNoise community/enterprise) + - `IPINFO_TOKEN` (org/geo enrichment) Testing @@ -26,6 +41,22 @@ Testing Notes -- CTI lookups use AbuseIPDB public site scraping as a baseline. In offline or restricted environments, the tool continues without CTI data. +- CTI lookups include AbuseIPDB/Talos/VirusTotal by default, and will also use OTX, GreyNoise, ThreatFox, and IPInfo when keys/network are available. In offline or restricted environments, the tool continues without CTI data. - Reports summarize overall activity, surface suspicious IPs (CTI risk, 4xx rate, UA flags), and include an optional brief AI anomaly insight when LLM is enabled. +Performance tips + +- To avoid rate limits on large logs, prefer `--llm-group-by ip --group-window 60 --llm-gate-4xx 5 --llm-sample 200 --cti-scope suspicious --cti-max 200`. +- For fully offline, fastest runs use `--no-llm --no-cti --no-reports`. + +Environment variables + +- `GROQ_TOKENS_BUDGET`: approximate daily token budget for LLM calls. When reached, enrichment gracefully degrades and continues offline. +- `OFFLINE_IP_BLOCKLIST`: path to a newline-separated list of IPs to treat as high risk without CTI calls. + - Token accounting uses model‑reported usage when available; otherwise a conservative estimate. + +Dashboard + +- Install UI deps: `pip install -r requirements.txt` +- Run: `streamlit run ui/app.py` +- Select the latest file in `data/processed/` and keep the auto-refresh enabled for near real-time updates while the CLI processes logs. diff --git a/docs/wiki/CLI.md b/docs/wiki/CLI.md new file mode 100644 index 0000000..eecd966 --- /dev/null +++ b/docs/wiki/CLI.md @@ -0,0 +1,26 @@ +# CLI Reference + +`python -m src.cli --out [options]` + +Core +- `--format {jsonl,csv}` +- `--summary`, `--preview N` +- `--no-llm`, `--no-cti`, `--no-reports` +- `--limit N`, `--color {auto,always,never}` + +LLM +- `--llm-group-by {none,ip,signature}` +- `--group-window SECONDS` +- `--llm-sample N`, `--llm-gate-4xx N`, `--llm-gate-ua` + +CTI +- `--cti-scope {suspicious,all}` +- `--cti-max N`, `--cti-batch-size N`, `--cti-batch-pause S` + +Examples +```bash +python -m src.cli data/raw/access.log --out data/processed --summary --preview 5 +python -m src.cli data/raw/big.log --out data/processed \ + --llm-group-by signature --llm-sample 100 --cti-max 100 --summary +``` + diff --git a/docs/wiki/CTI_and_LLM_Strategy.md b/docs/wiki/CTI_and_LLM_Strategy.md new file mode 100644 index 0000000..04add31 --- /dev/null +++ b/docs/wiki/CTI_and_LLM_Strategy.md @@ -0,0 +1,20 @@ +# CTI + LLM Strategy + +Principles +- Offline‑first: deterministic outputs without network +- Group then sample to minimize LLM calls +- Gates on 4xx and suspicious UA +- Strong CTI cache; batch + pause for resilience +- Budget throttle via `GROQ_TOKENS_BUDGET` + +LLM +- Grouping: `ip` or `signature` (ip+path+status+ua) +- Sampling: `--llm-sample N` (default 200) +- Gates: `--llm-gate-4xx N`, `--llm-gate-ua` + +CTI +- Scope: `--cti-scope suspicious` (default) or `all` +- Caps: `--cti-max`, batching and pause +- Cache: `data/cache/cti_cache.json` reused across runs +- VT/API: defer to shortlist; fail soft when rate‑limited + diff --git a/docs/wiki/Development.md b/docs/wiki/Development.md new file mode 100644 index 0000000..abfd410 --- /dev/null +++ b/docs/wiki/Development.md @@ -0,0 +1,19 @@ +# Development + +Structure +- `src/`: parsers, enrichers (LLM/CTI), CLI +- `tests/`: pytest suite (80%+ target), fixtures under `tests/fixtures/` +- `docs/`: usage and diagrams +- `notebooks/`: exploratory analysis (clear outputs) +- `data/raw/`: raw logs (keep large datasets out of git) + +Commands +- Tests: `pytest -q` or `pytest --cov=src -q` +- Lint/format: `ruff check . && ruff format .` +- Types: `mypy src` + +Contributing +- Conventional Commits; small PRs preferred (~≤300 LOC) +- No secrets: use `.env`, provide `.env.example` +- Add fixtures for new parsers; mock network in tests + diff --git a/docs/wiki/FAQ.md b/docs/wiki/FAQ.md new file mode 100644 index 0000000..05f33d7 --- /dev/null +++ b/docs/wiki/FAQ.md @@ -0,0 +1,14 @@ +# FAQ + +Q: Can I run fully offline? +A: Yes — use `--no-llm --no-cti`. Reports remain reproducible; severity is marked `unknown` with rationale. + +Q: How to avoid rate limits? +A: Use `--cti-max`, batching flags, and rely on the cache. Prefer grouping + sampling for LLM. + +Q: Why are some groups missing LLM notes? +A: They were gated/sampled out or the budget was reached. + +Q: Where are results stored? +A: `data/processed/` and `data/processed/reports/`. + diff --git a/docs/wiki/Home.md b/docs/wiki/Home.md new file mode 100644 index 0000000..d2bde44 --- /dev/null +++ b/docs/wiki/Home.md @@ -0,0 +1,17 @@ +# LogCTIAI Wiki + +Welcome to the LogCTIAI wiki. This project analyzes large web/server logs, enriches them with optional LLM reasoning, and performs efficient CTI lookups — all designed to run offline‑first and on a budget. + +- Quickstart: installation and first run +- CLI: all flags and examples +- CTI + LLM Strategy: grouping, sampling, gates, cache, budgets +- Development: repo structure, testing, style +- FAQ: common questions and troubleshooting + +![Mindmap](../ProjectMindmapv0.5.png) + +Useful links: +- README (repo root) +- Usage guide: docs/USAGE.md +- Streamlit UI: ui/ + diff --git a/docs/wiki/Quickstart.md b/docs/wiki/Quickstart.md new file mode 100644 index 0000000..6dca8b9 --- /dev/null +++ b/docs/wiki/Quickstart.md @@ -0,0 +1,18 @@ +# Quickstart + +Create env +- `python -m venv .venv && source .venv/bin/activate` +- `pip install -r requirements.txt` + +Offline run +- `python -m src.cli data/raw/big.log --out data/processed --no-llm --no-cti --no-reports` + +Budgeted run +- `export GROQ_TOKENS_BUDGET=150000` +- `python -m src.cli data/raw/big.log --out data/processed --llm-group-by ip --llm-sample 200 --cti-scope suspicious --cti-max 200 --color never` + +Outputs +- `data/processed/*.jsonl|csv` +- `data/processed/reports/*.{txt,md}` +- `data/cache/cti_cache.json` + diff --git a/requirements.txt b/requirements.txt index 5ef8a99..4c82835 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,11 @@ +requests[socks]>=2.31.0 python-dotenv>=1.0.1 -groq>=0.9.0 -httpx>=0.27.0 -PyPDF2>=3.0.1 -beautifulsoup4>=4.12.3 -mypy-extensions>=1.0.0 -types-requests; python_version>='3.8' -types-python-dateutil; python_version>='3.8' -pytest>=8.2.0 -pytest-cov>=5.0.0 +fpdf2>=2.7.8 rich>=13.7.1 -uvloop; platform_system != 'Windows' -markdown>=3.6 +ipaddress; python_version<'3.3' +ruff>=0.5.0 +pytest>=8.2.0 +mypy>=1.10.0 +streamlit>=1.36.0 +pandas>=2.2.2 +groq>=0.11.0 diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..dd58ce5 --- /dev/null +++ b/run.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +cmd=${1:-help} + +case "$cmd" in + setup) + python -m venv .venv + source .venv/bin/activate + pip install -r requirements.txt + ;; + ui) + source .venv/bin/activate + streamlit run src/ui/streamlit_app.py + ;; + scan) + source .venv/bin/activate + python -m src.cli scan-ips "${2:-data/sample_ips.txt}" --out data/processed + ;; + scan-all) + source .venv/bin/activate + python -m src.cli scan-ips "${2:-data/sample_ips.txt}" --out data/processed --cti-max -1 --cti-rate 0.8 --cti-burst 1 --save-every 25 + ;; + doctor) + make doctor || true + ;; + *) + echo "Usage: ./run.sh [setup|ui|scan|scan-all|doctor] [file]" ;; +esac + diff --git a/scripts/publish_wiki.sh b/scripts/publish_wiki.sh new file mode 100755 index 0000000..2072b31 --- /dev/null +++ b/scripts/publish_wiki.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_URL=$(git remote get-url origin) +OWNER_REPO=${REPO_URL#https://github.com/} +OWNER_REPO=${OWNER_REPO%.git} + +WIKI_URL="https://github.com/${OWNER_REPO}.wiki.git" +WORKDIR=$(mktemp -d) +trap 'rm -rf "$WORKDIR"' EXIT + +echo "Cloning wiki: $WIKI_URL" +if ! git clone "$WIKI_URL" "$WORKDIR"; then + echo "Error: Wiki repository not found. Ensure the repo's Wiki is enabled and that you have push access." >&2 + echo "You can enable Wiki with: gh repo edit ${OWNER_REPO} --enable-wiki" >&2 + exit 1 +fi + +rsync -a --delete docs/wiki/ "$WORKDIR"/ +cd "$WORKDIR" +git add . +if git diff --cached --quiet; then + echo "No wiki changes to publish." + exit 0 +fi +git commit -m "wiki: sync from docs/wiki" +git push origin HEAD +echo "Wiki published: https://github.com/${OWNER_REPO}/wiki" + diff --git a/scripts/run_ui.sh b/scripts/run_ui.sh new file mode 100755 index 0000000..54af48a --- /dev/null +++ b/scripts/run_ui.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [ -d .venv ]; then + . .venv/bin/activate +fi + +exec streamlit run ui/app.py + diff --git a/src/__init__.py b/src/__init__.py index fe16459..63023b0 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,2 +1,2 @@ -__all__ = [] +"""LogCTIAI source package.""" diff --git a/src/cache/__init__.py b/src/cache/__init__.py new file mode 100644 index 0000000..d16980d --- /dev/null +++ b/src/cache/__init__.py @@ -0,0 +1,2 @@ +"""Cache utilities.""" + diff --git a/src/cache/cti_cache.py b/src/cache/cti_cache.py new file mode 100644 index 0000000..f735962 --- /dev/null +++ b/src/cache/cti_cache.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any, Dict, Optional + + +DEFAULT_CACHE = Path("data/cache/cti_cache.json") + + +def _ensure_parent(path: Path) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + + +def load_cache(path: Path = DEFAULT_CACHE) -> Dict[str, Any]: + if not path.exists(): + return {} + try: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + except Exception: + return {} + + +def save_cache(cache: Dict[str, Any], path: Path = DEFAULT_CACHE) -> None: + _ensure_parent(path) + with path.open("w", encoding="utf-8") as f: + json.dump(cache, f, ensure_ascii=False, indent=2) + + +def get_cached(cache: Dict[str, Any], key: str) -> Optional[Dict[str, Any]]: + val = cache.get(key) + if isinstance(val, dict): + return val + return None + + +def set_cached(cache: Dict[str, Any], key: str, value: Dict[str, Any]) -> None: + cache[key] = value + diff --git a/src/cli.py b/src/cli.py index f6b94e1..e4dd2e5 100644 --- a/src/cli.py +++ b/src/cli.py @@ -16,16 +16,23 @@ from rich.traceback import install as rich_traceback_install from .parsers.text_extractor import extract_text_from_pdf, read_text_file -from .parsers.log_parser import parse_lines +from .parsers.log_parser import parse_lines, parse_line from .enrichers.llm_enricher import enrich_log_records from .enrichers.cti_service import cti_for_ips from .parsers.ua_analysis import detect_suspicious_user_agent -from .reports.report_builder import build_text_report, build_markdown_report +from .reports.report_builder import ( + build_text_report, + build_markdown_report, + build_malicious_ai_report, +) from .config import get_settings +from .groq_client import GroqRotatingClient rich_traceback_install(show_locals=False) console = Console() +# Verbosity level for CLI logs. One of: "quiet", "normal", "max". +_VERBOSITY = "max" def process_log( @@ -36,15 +43,40 @@ def process_log( out_format: str = "jsonl", with_cti: bool = True, build_reports: bool = True, + *, + llm_sample: int | None = None, + llm_group_by: list[str] | None = None, + group_window_sec: int | None = None, + llm_gate_min_4xx: int | None = None, + llm_gate_ua: bool = False, + cti_scope: str = "suspicious", + cti_max: int | None = None, + cti_batch_size: int | None = None, + cti_batch_pause: float = 0.0, + ai_malicious_report: bool = False, ) -> Path: - console.rule("[bold cyan]🔎 Parsing Log") - console.log(f"Parsing log: [bold]{path}") + if _VERBOSITY != "quiet": + console.rule("[bold cyan]🔎 Parsing Log") + console.log(f"Parsing log: [bold]{path}") lines = path.read_text(encoding="utf-8", errors="ignore").splitlines() if limit is not None: lines = lines[:limit] records = [r.to_dict() for r in parse_lines(lines)] - console.log(f"Parsed [bold green]{len(records)}[/] records") - enriched = enrich_log_records(records, use_llm=use_llm) + if _VERBOSITY == "max": + console.log(f"Parsed [bold green]{len(records)}[/] records") + if _VERBOSITY == "max": + console.log("Starting enrichment with current LLM/filters...") + enriched = enrich_log_records( + records, + use_llm=use_llm, + llm_sample=llm_sample, + group_by=llm_group_by, + group_window_sec=group_window_sec, + llm_gate_min_4xx=llm_gate_min_4xx, + llm_gate_ua=llm_gate_ua, + ) + if _VERBOSITY == "max": + console.log(f"Enrichment done; writing outputs to [bold]{out_dir}[/].") out_dir.mkdir(parents=True, exist_ok=True) if out_format == "csv": out_path = out_dir / f"{path.stem}.csv" @@ -61,11 +93,16 @@ def process_log( f.write(json.dumps(r, ensure_ascii=False) + "\n") # If requested, compute CTI + stats and build reports if build_reports: - console.rule("[bold blue]🧠 Stats + CTI + Reports") + if _VERBOSITY != "quiet": + console.rule("[bold blue]🧠 Stats + CTI + Reports") overall_stats, suspicious_rows, ai_insight = summarize_and_cti( enriched_records=enriched, use_llm=use_llm, with_cti=with_cti, + cti_scope=cti_scope, + cti_max=cti_max, + cti_batch_size=cti_batch_size, + cti_batch_pause=cti_batch_pause, ) reports_dir = out_dir / "reports" txt_path = build_text_report( @@ -80,7 +117,67 @@ def process_log( overall_stats=overall_stats, ai_insight=ai_insight, ) - console.log(f"Reports saved: [bold]{txt_path}[/], [bold]{md_path}[/]") + if _VERBOSITY != "quiet": + console.log(f"Reports saved: [bold]{txt_path}[/], [bold]{md_path}[/]") + + # Optional: generate a detailed malicious activity report using LLM + if ai_malicious_report and use_llm and suspicious_rows: + try: + if _VERBOSITY != "quiet": + console.log("Generating AI-written malicious activity report for strongest indicators...") + + # Select IPs with strongest malicious indicators + def is_malicious(row: dict[str, object]) -> bool: + risk = str(row.get("risk", "unknown")).lower() + talos = str(row.get("talos_reputation", "")).lower() + vt_mal = int(row.get("vt_malicious") or 0) + vt_susp = int(row.get("vt_suspicious") or 0) + return ( + risk in {"high"} + or talos in {"untrusted", "malicious"} + or vt_mal >= 1 + or vt_susp >= 3 + ) + + malicious = [r for r in suspicious_rows if is_malicious(r)] + if malicious: + # Derive minimal per-IP context from enriched events (top paths/UA) + from collections import Counter as _C + per_ip_paths: dict[str, list[tuple[str, int]]] = {} + per_ip_ua: dict[str, str] = {} + for ip in {str(r.get("ip")) for r in malicious}: + paths = _C([str(e.get("path")) for e in enriched if str(e.get("ip")) == ip and e.get("path")]) + per_ip_paths[ip] = paths.most_common(5) + # pick any UA string observed + for e in enriched: + if str(e.get("ip")) == ip and (e.get("ua") or e.get("user_agent")): + per_ip_ua[ip] = str(e.get("ua") or e.get("user_agent")) + break + # Build prompt + insight_req = { + "malicious": malicious[:20], # cap to keep prompt small + "per_ip_top_paths": per_ip_paths, + "per_ip_ua": per_ip_ua, + } + client = GroqRotatingClient() + content = client.chat([ + { + "role": "system", + "content": ( + "You are a senior SOC analyst. Draft a concise but detailed incident note summarizing malicious " + "activity detected in logs corroborated by CTI (AbuseIPDB, Talos, VirusTotal). " + "Include: IP(s), CTI signals, notable paths, suspected TTPs, and recommended actions (blocking, WAF rules, triage). " + "Use clear sections and bullets." + ), + }, + {"role": "user", "content": json.dumps(insight_req)}, + ]) + rpt_txt, rpt_md = build_malicious_ai_report(reports_dir, content) + console.log(f"Malicious AI report saved: [bold]{rpt_txt}[/], [bold]{rpt_md}[/]") + else: + console.log("[dim]No strong malicious CTI signals; skipping detailed AI report.") + except Exception as e: # pragma: no cover - network/env specific + console.log(f"[dim]Malicious AI report unavailable: {e}") return out_path @@ -89,6 +186,11 @@ def summarize_and_cti( enriched_records: list[dict[str, object]], use_llm: bool, with_cti: bool = True, + *, + cti_scope: str = "suspicious", # 'suspicious' | 'all' + cti_max: int | None = None, + cti_batch_size: int | None = None, + cti_batch_pause: float = 0.0, ) -> tuple[dict[str, object], list[dict[str, object]], str | None]: """Compute overall stats, annotate suspicious IPs with CTI + UA, and optional AI note. @@ -108,6 +210,7 @@ def summarize_and_cti( } # Per-IP stats + settings = get_settings() per_ip = defaultdict(lambda: {"requests": 0, "errors_4xx": 0, "ua_suspicious": False}) for r in enriched_records: ip = str(r.get("ip") or "") @@ -120,14 +223,38 @@ def summarize_and_cti( status = 0 if 400 <= status < 500: per_ip[ip]["errors_4xx"] += 1 - ua_susp, _ = detect_suspicious_user_agent(str(r.get("ua") or r.get("user_agent") or "")) + ua_susp, _ = detect_suspicious_user_agent( + str(r.get("ua") or r.get("user_agent") or ""), + patterns=settings.suspicious_ua_patterns or None, + ) per_ip[ip]["ua_suspicious"] = per_ip[ip]["ua_suspicious"] or ua_susp # CTI lookup cti_map: dict[str, dict[str, object]] = {} if with_cti: try: - cti_results = cti_for_ips(per_ip.keys()) + # Decide candidate IPs to look up: prefer suspicious or top 4xx + if cti_scope == "all": + candidates = list(per_ip.keys()) + else: + candidates = [ + ip + for ip, stats in per_ip.items() + if (stats["errors_4xx"] >= settings.risk_4xx_threshold) or stats["ua_suspicious"] + ] + # Sort by 4xx desc then requests desc + candidates.sort(key=lambda i: (per_ip[i]["errors_4xx"], per_ip[i]["requests"]), reverse=True) + if cti_max is not None and cti_max >= 0: + candidates = candidates[:cti_max] + cti_results = cti_for_ips( + candidates, + virustotal_api_key=settings.virustotal_api_key, + otx_api_key=settings.otx_api_key, + greynoise_api_key=settings.greynoise_api_key, + ipinfo_token=settings.ipinfo_token, + batch_size=cti_batch_size, + pause_seconds=cti_batch_pause, + ) cti_map = {ip: v.to_dict() for ip, v in cti_results.items()} except Exception as e: # pragma: no cover - network / env specific console.log(f"[dim]CTI lookup failed: {e}. Continuing without CTI.") @@ -135,10 +262,27 @@ def summarize_and_cti( # Build suspicious rows suspicious_rows: list[dict[str, object]] = [] + # Load offline blocklist if provided + offline_blocked: set[str] = set() + if settings.offline_ip_blocklist: + try: + from pathlib import Path as _P + p = _P(settings.offline_ip_blocklist) + if p.exists(): + offline_blocked = {line.strip() for line in p.read_text(encoding="utf-8", errors="ignore").splitlines() if line.strip() and not line.strip().startswith('#')} + except Exception: + offline_blocked = set() for ip, stats in per_ip.items(): cti = cti_map.get(ip, {}) risk = str(cti.get("risk", "unknown")) - is_susp = risk in {"high", "medium"} or stats["errors_4xx"] >= 5 or stats["ua_suspicious"] + # Offline blocklist escalation + if ip in offline_blocked and risk != "high": + risk = "high" + is_susp = ( + risk in {"high", "medium"} + or stats["errors_4xx"] >= settings.risk_4xx_threshold + or stats["ua_suspicious"] + ) if not is_susp: continue row = { @@ -148,6 +292,14 @@ def summarize_and_cti( "total_reports": cti.get("total_reports"), "country": cti.get("country"), "url": cti.get("url"), + "talos_reputation": cti.get("talos_reputation"), + "talos_owner": cti.get("talos_owner"), + "vt_malicious": cti.get("vt_malicious"), + "vt_suspicious": cti.get("vt_suspicious"), + "otx_pulse_count": cti.get("otx_pulse_count"), + "greynoise_classification": cti.get("greynoise_classification"), + "threatfox_matches": cti.get("threatfox_matches"), + "ipinfo_org": cti.get("ipinfo_org"), **stats, } # One-line AI note from existing enrichment (if any) @@ -164,8 +316,6 @@ def summarize_and_cti( ai_insight: str | None = None if use_llm: try: - from .groq_client import GroqRotatingClient - client = GroqRotatingClient() insight_req = { "total_requests": total_requests, @@ -194,8 +344,6 @@ def process_pdf(path: Path, out_dir: Path, use_llm: bool) -> Path: out_path.write_text(text, encoding="utf-8") # Optional: one-shot summary with LLM if use_llm and text.strip(): - from .groq_client import GroqRotatingClient - client = GroqRotatingClient() summary = client.chat([ {"role": "system", "content": "Summarize the key findings in 5 bullets."}, @@ -257,7 +405,7 @@ def _preview_records(records: List[Dict[str, object]], n: int) -> None: "high": "bold red", "medium": "yellow", "low": "green", - }.get(sev, "cyan") + }.get(sev, "grey50") console.print(Panel( RichJSON.from_data(r, indent=2), title=f"Severity: [{sev_color}]{sev}[/]", @@ -270,6 +418,7 @@ def main(argv: List[str] | None = None) -> int: parser = argparse.ArgumentParser(description="Colorful Log + CTI pipeline with Groq enrichment") parser.add_argument("input", type=str, help="Path to input file (log, pdf, txt)") parser.add_argument("--out", type=str, default="data/processed", help="Output directory") + parser.add_argument("--verbose", choices=["quiet", "normal", "max"], default="max", help="Verbosity level for logs") parser.add_argument("--no-llm", action="store_true", help="Disable LLM enrichment") parser.add_argument("--limit", type=int, default=None, help="Limit records for quick tests") parser.add_argument("--summary", action="store_true", help="Print colorful summary in terminal") @@ -277,16 +426,36 @@ def main(argv: List[str] | None = None) -> int: parser.add_argument("--format", choices=["jsonl", "csv"], default="jsonl", help="Output format for logs") parser.add_argument("--no-cti", action="store_true", help="Disable CTI lookups") parser.add_argument("--no-reports", action="store_true", help="Do not build text/markdown reports") + parser.add_argument("--ai-malicious-report", action="store_true", help="Generate detailed AI report for malicious CTI signals") parser.add_argument("--color", choices=["auto", "always", "never"], default="auto", help="Terminal color policy") + # LLM request controls + parser.add_argument("--llm-sample", type=int, default=200, help="Limit LLM calls by sampling this many groups (0=all)") + parser.add_argument( + "--llm-group-by", + choices=["none", "ip", "signature"], + default="ip", + help="Group records before enrichment to reduce LLM calls: 'ip' (minimal), 'signature' (ip+path+status+ua), or 'none'", + ) + parser.add_argument("--group-window", type=int, default=0, help="Optional time window (seconds) to include in grouping key") + parser.add_argument("--llm-gate-4xx", type=int, default=0, help="Only send groups with at least this many 4xx to the LLM (0=disabled)") + parser.add_argument("--llm-gate-ua", action="store_true", help="Only send groups with suspicious UA patterns to the LLM") + # CTI request controls + parser.add_argument("--cti-scope", choices=["suspicious", "all"], default="suspicious", help="Which IPs to look up for CTI") + parser.add_argument("--cti-max", type=int, default=100, help="Max CTI lookups (0=unlimited)") + parser.add_argument("--cti-batch-size", type=int, default=0, help="Batch size for CTI lookups (0=disabled)") + parser.add_argument("--cti-batch-pause", type=float, default=0.0, help="Pause seconds between CTI batches") args = parser.parse_args(argv) - # Configure console color policy + # Configure console color policy and verbosity global console + global _VERBOSITY + _VERBOSITY = str(getattr(args, "verbose", "max")) force_term = args.color == "always" no_color = args.color == "never" console = Console(force_terminal=force_term, no_color=no_color) - console.print(Rule(title="[bold cyan]🧭 Log + CTI Pipeline")) + if _VERBOSITY != "quiet": + console.print(Rule(title="[bold cyan]🧭 Log + CTI Pipeline")) use_llm = not args.no_llm and bool(settings.groq_api_keys) if not use_llm: @@ -300,7 +469,32 @@ def main(argv: List[str] | None = None) -> int: out_path: Path enriched_records: List[Dict[str, object]] | None = None - if suffix in {".log", ".txt"} and path.name.startswith("access_log"): + # Heuristic: treat .log as logs; for .txt, auto-detect by trying to parse a few lines + def _looks_like_log_file(p: Path, sample_lines: int = 200) -> bool: + try: + text = p.read_text(encoding="utf-8", errors="ignore") + except Exception: + return False + lines = text.splitlines()[:sample_lines] + parsed = 0 + for ln in lines: + if parse_line(ln): + parsed += 1 + # One hit is enough to call it a log + break + return parsed > 0 + + if suffix == ".log" or (suffix == ".txt" and _looks_like_log_file(path)): + # Compute grouping config for LLM + gb = None + if args.llm_group_by == "ip": + gb = ["ip"] + elif args.llm_group_by == "signature": + gb = ["ip", "path", "status", "ua"] + # Normalize sample value + sample = None if args.llm_sample in (None, 0) else max(0, int(args.llm_sample)) + group_window = None if args.group_window in (None, 0) else max(1, int(args.group_window)) + gate4xx = None if args.llm_gate_4xx in (None, 0) else max(1, int(args.llm_gate_4xx)) out_path = process_log( path, out_dir, @@ -309,19 +503,19 @@ def main(argv: List[str] | None = None) -> int: out_format=args.format, with_cti=not args.no_cti, build_reports=not args.no_reports, + llm_sample=sample, + llm_group_by=gb, + group_window_sec=group_window, + llm_gate_min_4xx=gate4xx, + llm_gate_ua=bool(args.llm_gate_ua), + cti_scope=args.cti_scope, + cti_max=(None if args.cti_max in (None, 0) else max(0, int(args.cti_max))), + cti_batch_size=(None if getattr(args, 'cti_batch_size', 0) in (None, 0) else max(1, int(args.cti_batch_size))), + cti_batch_pause=float(getattr(args, 'cti_batch_pause', 0.0) or 0.0), + ai_malicious_report=bool(args.ai_malicious_report), ) # Load enriched to drive summary/preview enriched_records = [json.loads(l) for l in (out_dir / f"{path.stem}.jsonl").read_text(encoding="utf-8").splitlines()] if args.format == "jsonl" else None - elif suffix == ".log": - out_path = process_log( - path, - out_dir, - use_llm=use_llm, - limit=args.limit, - out_format=args.format, - with_cti=not args.no_cti, - build_reports=not args.no_reports, - ) elif suffix == ".pdf": out_path = process_pdf(path, out_dir, use_llm=use_llm) elif suffix == ".txt": diff --git a/src/cli/__init__.py b/src/cli/__init__.py new file mode 100644 index 0000000..3c22d42 --- /dev/null +++ b/src/cli/__init__.py @@ -0,0 +1,19 @@ +"""CLI package for running scans. + +Exports legacy helpers used by tests and external callers by re-exporting +from the historical single-file CLI module `src/cli.py`. +""" + +from ..cli import ( # type: ignore[F401] + process_log, + summarize_and_cti, + _print_summary, + _preview_records, +) + +__all__ = [ + "process_log", + "summarize_and_cti", + "_print_summary", + "_preview_records", +] diff --git a/src/cli/__main__.py b/src/cli/__main__.py new file mode 100644 index 0000000..5a5f075 --- /dev/null +++ b/src/cli/__main__.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import argparse +import ipaddress +import os +from pathlib import Path +from typing import List, Optional, Set + +from dotenv import load_dotenv +from rich import print +from rich.progress import Progress + +from src.core.scanner import ScanOptions, scan_ips_enrich +from src.report.pdf_report import PDFReport +import csv +import json + + +def _read_ips_from_file(path: Path) -> List[str]: + ips: List[str] = [] + with path.open("r", encoding="utf-8") as f: + for line in f: + s = line.strip() + if not s or s.startswith("#"): + continue + try: + ipaddress.ip_address(s) + ips.append(s) + except ValueError: + continue + return ips + + +def _load_offline_blocklist() -> Set[str]: + path = os.getenv("OFFLINE_IP_BLOCKLIST") + bad: Set[str] = set() + if path and Path(path).exists(): + bad.update(_read_ips_from_file(Path(path))) + return bad + + +def cmd_scan_ips(args: argparse.Namespace) -> int: + load_dotenv() + out: Path = Path(args.out) + out.mkdir(parents=True, exist_ok=True) + + input_path = Path(args.input_path) + if not input_path.exists(): + print(f"[red]Input file not found:[/red] {input_path}") + return 2 + + ips = _read_ips_from_file(input_path) + print(f"[bold]Loaded[/bold] {len(ips)} IPs, {len(list(dict.fromkeys(ips)))} unique.") + + opts = ScanOptions( + cti_max=args.cti_max, + use_cache=not args.no_cache, + no_cti=args.no_cti, + offline_blocklist=_load_offline_blocklist(), + cti_rate_per_sec=args.cti_rate, + cti_burst=args.cti_burst, + save_every=args.save_every, + abuseipdb=(not getattr(args, "no_abuseipdb", False)), + abuseipdb_threshold=getattr(args, "abuse_threshold", 50), + abuseipdb_rate_per_sec=getattr(args, "abuse_rate", 0.8), + abuseipdb_burst=getattr(args, "abuse_burst", 1), + workers=getattr(args, "workers", 4), + ) + + with Progress() as progress: + task = progress.add_task("Scanning", total=len(ips)) + rows, summary, errors = scan_ips_enrich( + ips, + opts, + on_progress=lambda i, t: progress.update(task, completed=i, total=t), + ) + + if not args.include_suspicious: + rows = [r for r in rows if r["classification"] == "malicious"] + + pdf = PDFReport() + blob = pdf.build(malicious_rows=rows, summary=summary) + + out_pdf = out / "ip_threat_report.pdf" + with out_pdf.open("wb") as f: + f.write(blob) + print(f"[green]Report written:[/green] {out_pdf}") + # Write CSV/JSON for machine use + out_json = out / "ip_threat_report.json" + with out_json.open("w", encoding="utf-8") as f: + json.dump({ + "summary": summary, + "rows": rows, + }, f, ensure_ascii=False, indent=2) + out_csv = out / "ip_threat_report.csv" + if rows: + with out_csv.open("w", encoding="utf-8", newline="") as f: + writer = csv.DictWriter(f, fieldnames=list(rows[0].keys())) + writer.writeheader() + writer.writerows(rows) + print(f"[green]Also wrote:[/green] {out_json}, {out_csv}") + if errors: + print(f"[yellow]{len(errors)} errors occurred. See log above or rerun with fewer IPs.[/yellow]") + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser( + prog="logctiai", + description="Scan IPs with CTI and generate PDF report.", + ) + sub = parser.add_subparsers(dest="command") + + sp = sub.add_parser("scan-ips", help="Scan IPs from a file and output a PDF report") + sp.add_argument("input_path", type=str, help="Path to text file with one IP per line") + sp.add_argument("--out", type=str, default="data/processed", help="Output directory") + sp.add_argument( + "--cti-max", + type=int, + default=200, + help="Max CTI lookups (deduped). Use -1 for all IPs", + ) + sp.add_argument("--no-cache", action="store_true", help="Do not use or update the cache") + sp.add_argument("--include-suspicious", action="store_true", help="Include suspicious in report") + sp.add_argument("--no-cti", action="store_true", help="Disable CTI calls (offline)") + sp.add_argument("--cti-rate", type=float, default=1.0, help="CTI requests per second") + sp.add_argument("--cti-burst", type=int, default=1, help="CTI burst size") + sp.add_argument("--save-every", type=int, default=50, help="Save cache every N updates") + sp.add_argument("--no-abuseipdb", action="store_true", help="Disable AbuseIPDB lookups (if not desired)") + sp.add_argument("--abuse-threshold", type=int, default=50, help="AbuseIPDB confidence threshold for malicious") + sp.add_argument("--abuse-rate", type=float, default=0.8, help="AbuseIPDB requests per second") + sp.add_argument("--abuse-burst", type=int, default=1, help="AbuseIPDB burst size") + sp.add_argument("--workers", type=int, default=4, help="Parallel workers for CTI (limit-safe)") + sp.set_defaults(func=cmd_scan_ips) + + args = parser.parse_args() + if not hasattr(args, "func"): + parser.print_help() + return 1 + return int(args.func(args)) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/config.py b/src/config.py index 87280c8..751113a 100644 --- a/src/config.py +++ b/src/config.py @@ -3,7 +3,7 @@ import os import random from dataclasses import dataclass -from typing import List +from typing import List, Optional from dotenv import load_dotenv @@ -12,6 +12,13 @@ class Settings: groq_api_keys: List[str] groq_model: str + risk_4xx_threshold: int + suspicious_ua_patterns: List[str] + virustotal_api_key: Optional[str] + otx_api_key: Optional[str] + greynoise_api_key: Optional[str] + ipinfo_token: Optional[str] + offline_ip_blocklist: Optional[str] def get_settings() -> Settings: @@ -21,5 +28,26 @@ def get_settings() -> Settings: # Shuffle order to distribute load if multiple keys are provided random.shuffle(keys) model = os.getenv("GROQ_MODEL", "llama3-8b-8192") - return Settings(groq_api_keys=keys, groq_model=model) - + # Risk/UA configuration + try: + risk_4xx_threshold = int(os.getenv("RISK_4XX_THRESHOLD", "5")) + except ValueError: + risk_4xx_threshold = 5 + ua_raw = os.getenv("SUSPICIOUS_UA_REGEX", "").strip() + ua_patterns: List[str] = [p.strip() for p in ua_raw.split(",") if p.strip()] + vt_key = os.getenv("VT_API_KEY") or None + otx_key = os.getenv("OTX_API_KEY") or None + gn_key = os.getenv("GREYNOISE_API_KEY") or None + ipinfo = os.getenv("IPINFO_TOKEN") or None + offline_blocklist = os.getenv("OFFLINE_IP_BLOCKLIST") or None + return Settings( + groq_api_keys=keys, + groq_model=model, + risk_4xx_threshold=risk_4xx_threshold, + suspicious_ua_patterns=ua_patterns, + virustotal_api_key=vt_key, + otx_api_key=otx_key, + greynoise_api_key=gn_key, + ipinfo_token=ipinfo, + offline_ip_blocklist=offline_blocklist, + ) diff --git a/src/core/scanner.py b/src/core/scanner.py new file mode 100644 index 0000000..3eb3858 --- /dev/null +++ b/src/core/scanner.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import ipaddress +from dataclasses import dataclass +from concurrent.futures import ThreadPoolExecutor, as_completed +from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple + +from dotenv import load_dotenv + +from src.cache.cti_cache import DEFAULT_CACHE, get_cached, load_cache, save_cache, set_cached +from src.cti import VirusTotalClient, VTResult, AbuseIPDBClient, AbuseIPDBResult +from src.cti.ratelimit import RateLimitConfig + + +ProgressCb = Optional[Callable[[int, int], None]] + + +@dataclass +class ScanOptions: + cti_max: int = 200 + use_cache: bool = True + no_cti: bool = False + offline_blocklist: Optional[Set[str]] = None + cti_rate_per_sec: float = 1.0 + cti_burst: int = 1 + save_every: int = 50 # persist cache every N updates + abuseipdb: bool = True + abuseipdb_threshold: int = 50 + abuseipdb_rate_per_sec: float = 0.8 + abuseipdb_burst: int = 1 + workers: int = 1 # parallel workers for CTI; 1 = sequential + + +def parse_ips(lines: Iterable[str]) -> List[str]: + ips: List[str] = [] + for line in lines: + s = line.strip() + if not s or s.startswith("#"): + continue + try: + ipaddress.ip_address(s) + ips.append(s) + except ValueError: + continue + return list(dict.fromkeys(ips)) + + +def scan_ips_list( + ips: List[str], + options: ScanOptions, + on_progress: ProgressCb = None, +) -> Tuple[List[VTResult], Dict[str, int], List[str]]: + """Scan IP addresses via VT, honoring caching and limits. + + Returns (results, summary, errors). + """ + load_dotenv() + + unique_ips = list(dict.fromkeys(ips)) + cache = load_cache(DEFAULT_CACHE) if options.use_cache else {} + offline_bad = options.offline_blocklist or set() + vt = VirusTotalClient(rate=RateLimitConfig(per_second=options.cti_rate_per_sec, burst=options.cti_burst)) + abip = AbuseIPDBClient(rate=RateLimitConfig(per_second=options.abuseipdb_rate_per_sec, burst=options.abuseipdb_burst)) + + errors: List[str] = [] + results: List[VTResult] = [] + + to_query = [ip for ip in unique_ips if ip not in offline_bad] + if not options.no_cti and options.cti_max >= 0: + to_query = to_query[: options.cti_max] + + total = len(unique_ips) + save_counter = 0 + results_by_ip: Dict[str, VTResult] = {} + + # Helper to call CTI providers for an IP + def fetch_cti(ip: str) -> Tuple[str, Optional[VTResult]]: + try: + vt_res: Optional[VTResult] = None + if not options.no_cti and ip in to_query and vt.enabled(): + vt_res = vt.fetch(ip) + return ip, vt_res + except Exception as e: + errors.append(f"{ip}: {e}") + return ip, None + + # Stage 1: immediate results for offline list and cache hits; schedule the rest + scheduled_ips: List[str] = [] + futures_map = {} + max_workers = max(1, int(options.workers)) + executor = ThreadPoolExecutor(max_workers=max_workers) if max_workers > 1 else None + + for ip in unique_ips: + try: + if ip in offline_bad: + results_by_ip[ip] = VTResult( + ip=ip, + malicious=1, + suspicious=0, + harmless=0, + undetected=0, + last_analysis_date=None, + asn=None, + as_owner="OFFLINE_BLOCKLIST", + country=None, + link=None, + ) + # progress will be updated after combining with scheduled completions + else: + cached = get_cached(cache, f"vt:{ip}") if options.use_cache else None + if cached is not None: + results_by_ip[ip] = VTResult(**cached) + elif not options.no_cti and vt.enabled() and ip in to_query and executor is not None: + fut = executor.submit(fetch_cti, ip) + futures_map[fut] = ip + scheduled_ips.append(ip) + else: + # sequential fetch or no CTI + vt_res = None + if not options.no_cti and vt.enabled() and ip in to_query: + vt_res = vt.fetch(ip) + if vt_res and options.use_cache: + set_cached(cache, f"vt:{ip}", vt_res.__dict__) + save_counter += 1 + if save_counter >= max(1, options.save_every): + save_cache(cache, DEFAULT_CACHE) + save_counter = 0 + results_by_ip[ip] = vt_res or VTResult( + ip=ip, + malicious=0, + suspicious=0, + harmless=0, + undetected=0, + last_analysis_date=None, + asn=None, + as_owner=None, + country=None, + link=None, + ) + + # Optionally schedule or use cache for AbuseIPDB (we only cache; rows use it later) + if options.abuseipdb and not options.no_cti and abip.enabled() and ip not in offline_bad: + ab_cached = get_cached(cache, f"abip:{ip}") if options.use_cache else None + if ab_cached is None: + # do not parallelize here to keep code simple; we rely on its own rate limiter + try: + ab_res = abip.fetch(ip) + if ab_res and options.use_cache: + set_cached(cache, f"abip:{ip}", ab_res.__dict__) + save_counter += 1 + if save_counter >= max(1, options.save_every): + save_cache(cache, DEFAULT_CACHE) + save_counter = 0 + except Exception as e: + errors.append(f"{ip} (abuseipdb): {e}") + except Exception as e: + errors.append(f"{ip}: {e}") + results_by_ip[ip] = VTResult( + ip=ip, + malicious=0, + suspicious=0, + harmless=0, + undetected=0, + last_analysis_date=None, + asn=None, + as_owner=None, + country=None, + link=None, + ) + + # Stage 2: collect scheduled futures + if executor is not None: + for fut in as_completed(futures_map): + ip, vt_res = fut.result() + if vt_res and options.use_cache: + set_cached(cache, f"vt:{ip}", vt_res.__dict__) + save_counter += 1 + if save_counter >= max(1, options.save_every): + save_cache(cache, DEFAULT_CACHE) + save_counter = 0 + results_by_ip[ip] = vt_res or VTResult( + ip=ip, + malicious=0, + suspicious=0, + harmless=0, + undetected=0, + last_analysis_date=None, + asn=None, + as_owner=None, + country=None, + link=None, + ) + if on_progress: + try: + # approximate progress: number collected so far + immediate ones + completed = len(results_by_ip) + on_progress(completed, total) + except Exception: + pass + executor.shutdown(wait=True) + + # Build ordered results list + results = [results_by_ip[ip] for ip in unique_ips] + if on_progress and not futures_map: + # sequential path; update final state + try: + on_progress(total, total) + except Exception: + pass + + if options.use_cache: + save_cache(cache, DEFAULT_CACHE) + + malicious = sum(1 for r in results if r.malicious > 0) + suspicious = sum(1 for r in results if (r.malicious == 0 and r.suspicious > 0)) + harmless = sum(1 for r in results if (r.malicious == 0 and r.suspicious == 0 and r.harmless > 0)) + + summary = { + "total": len(results), + "malicious": malicious, + "suspicious": suspicious, + "harmless": harmless, + "errors": len(errors), + } + return results, summary, errors + + +def scan_ips_enrich( + ips: List[str], + options: ScanOptions, + on_progress: ProgressCb = None, +): + """Scan IPs with VT and AbuseIPDB and return ready-to-report rows. + + Returns (rows, summary, errors). + """ + results, summary, errors = scan_ips_list(ips, options, on_progress) + cache = load_cache(DEFAULT_CACHE) if options.use_cache else {} + + rows = [] + for r in results: + ip = r.ip + ab: Optional[AbuseIPDBResult] = None + ab_raw = get_cached(cache, f"abip:{ip}") if options.use_cache else None + if ab_raw: + try: + ab = AbuseIPDBResult(**ab_raw) + except Exception: + ab = None + is_mal = (r.malicious > 0) or (ab.is_malicious(options.abuseipdb_threshold) if ab else False) + is_susp = (not is_mal and r.suspicious > 0) + label = "malicious" if is_mal else ("suspicious" if is_susp else "clean") + country = (r.country or (ab.country_code if ab else None) or "") + owner = (r.as_owner or (ab.isp if ab else None) or "") + rows.append( + { + "ip": ip, + "classification": label, + "country": country, + "malicious": str(r.malicious), + "suspicious": str(r.suspicious), + "harmless": str(r.harmless), + "as_owner": owner, + } + ) + # Recompute counts from rows for correctness + total = len(rows) + malicious = sum(1 for x in rows if x["classification"] == "malicious") + suspicious = sum(1 for x in rows if x["classification"] == "suspicious") + harmless = total - malicious - suspicious + summary.update({"total": total, "malicious": malicious, "suspicious": suspicious, "harmless": harmless}) + return rows, summary, errors diff --git a/src/cti/__init__.py b/src/cti/__init__.py new file mode 100644 index 0000000..6f43125 --- /dev/null +++ b/src/cti/__init__.py @@ -0,0 +1,11 @@ +"""CTI clients and helpers.""" + +from .virustotal import VirusTotalClient, VTResult +from .abuseipdb import AbuseIPDBClient, AbuseIPDBResult + +__all__ = [ + "VirusTotalClient", + "VTResult", + "AbuseIPDBClient", + "AbuseIPDBResult", +] diff --git a/src/cti/abuseipdb.py b/src/cti/abuseipdb.py new file mode 100644 index 0000000..4c767ba --- /dev/null +++ b/src/cti/abuseipdb.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +import json +import os +import time +from dataclasses import dataclass +from typing import Any, Dict, Optional + +import requests + +from .ratelimit import RateLimitConfig, RateLimiter +from src.net.proxy import ProxyRotator + + +@dataclass +class AbuseIPDBResult: + ip: str + abuse_confidence: int + total_reports: int + country_code: Optional[str] + isp: Optional[str] + usage_type: Optional[str] + domain: Optional[str] + last_reported_at: Optional[str] + link: Optional[str] + + def is_malicious(self, threshold: int = 50) -> bool: + try: + return int(self.abuse_confidence) >= int(threshold) + except Exception: + return False + + +class AbuseIPDBClient: + """Minimal AbuseIPDB v2 client for IP check. + + Reads API key from env var ABUSEIPDB_API_KEY. + """ + + BASE = "https://api.abuseipdb.com/api/v2/check" + + def __init__( + self, + api_key: Optional[str] = None, + timeout: float = 15.0, + rate: Optional[RateLimitConfig] = None, + proxies: Optional[ProxyRotator] = None, + ): + # Support single or multiple keys via env + env_multi = os.getenv("ABUSEIPDB_API_KEYS", "").strip() + if env_multi: + self.api_keys = [k.strip() for k in env_multi.split(",") if k.strip()] + else: + single = api_key or os.getenv("ABUSEIPDB_API_KEY") + self.api_keys = [single] if single else [] + self._key_index = 0 + self.timeout = timeout + self.ratelimiter = RateLimiter(rate or RateLimitConfig(per_second=1.0, burst=1)) + self.session = requests.Session() + self.proxies = proxies or ProxyRotator.from_env() + + def enabled(self) -> bool: + return bool(self.api_keys) + + def fetch(self, ip: str) -> Optional[AbuseIPDBResult]: + if not self.enabled(): + return None + params = {"ipAddress": ip, "maxAgeInDays": 365} + # Use current key; rotate between attempts if multiple keys are configured + for attempt in range(4): + try: + self.ratelimiter.acquire() + key = self.api_keys[self._key_index % max(1, len(self.api_keys))] if self.api_keys else None + headers = {"Key": key or "", "Accept": "application/json"} + resp = self.session.get( + self.BASE, + params=params, + headers=headers, + timeout=self.timeout, + proxies=(self.proxies.get() if self.proxies.enabled() else None), + ) + if resp.status_code == 200: + return self._parse(resp.json(), ip) + if resp.status_code in (429, 500, 502, 503): + retry_after = resp.headers.get("Retry-After") + sleep_s = float(retry_after) if retry_after and retry_after.isdigit() else 2 ** attempt + time.sleep(sleep_s) + # If multiple keys are configured, rotate to distribute load + if len(self.api_keys) > 1 and resp.status_code == 429: + self._key_index = (self._key_index + 1) % len(self.api_keys) + continue + if resp.status_code == 403: + # Forbidden (possibly IP-level). Rotate proxy if configured; if multiple keys, rotate key as well. + if self.proxies.enabled(): + self.proxies.rotate() + if len(self.api_keys) > 1: + self._key_index = (self._key_index + 1) % len(self.api_keys) + time.sleep(2 ** attempt) + continue + try: + err = resp.json() + except json.JSONDecodeError: + err = {"error": resp.text} + raise RuntimeError(f"AbuseIPDB error {resp.status_code}: {err}") + except requests.RequestException as e: + if attempt == 3: + raise + time.sleep(2 ** attempt) + return None + + @staticmethod + def _parse(data: Dict[str, Any], ip: str) -> AbuseIPDBResult: + d = data.get("data", {}) + return AbuseIPDBResult( + ip=ip, + abuse_confidence=int(d.get("abuseConfidenceScore", 0) or 0), + total_reports=int(d.get("totalReports", 0) or 0), + country_code=d.get("countryCode"), + isp=d.get("isp"), + usage_type=d.get("usageType"), + domain=d.get("domain"), + last_reported_at=d.get("lastReportedAt"), + link=f"https://www.abuseipdb.com/check/{ip}", + ) diff --git a/src/cti/ratelimit.py b/src/cti/ratelimit.py new file mode 100644 index 0000000..5f0ba9e --- /dev/null +++ b/src/cti/ratelimit.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import threading +import time +from dataclasses import dataclass + + +@dataclass +class RateLimitConfig: + per_second: float = 1.0 # requests per second + burst: int = 1 # initial burst tokens + + +class RateLimiter: + """Simple token-bucket rate limiter (thread-safe).""" + + def __init__(self, cfg: RateLimitConfig) -> None: + self.cfg = cfg + self.tokens = float(cfg.burst) + self.last = time.monotonic() + self.lock = threading.Lock() + + def acquire(self) -> None: + with self.lock: + now = time.monotonic() + elapsed = now - self.last + self.last = now + self.tokens = min( + self.cfg.burst, self.tokens + elapsed * self.cfg.per_second + ) + if self.tokens >= 1.0: + self.tokens -= 1.0 + return + # Need to wait + need = 1.0 - self.tokens + delay = need / max(self.cfg.per_second, 1e-9) + time.sleep(delay) + # Recurse once after sleep to consume + self.acquire() + diff --git a/src/cti/virustotal.py b/src/cti/virustotal.py new file mode 100644 index 0000000..36411c6 --- /dev/null +++ b/src/cti/virustotal.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import json +import os +import time +from dataclasses import dataclass +from typing import Any, Dict, Optional + +import requests + +from .ratelimit import RateLimitConfig, RateLimiter +from src.net.proxy import ProxyRotator + +@dataclass +class VTResult: + ip: str + malicious: int + suspicious: int + harmless: int + undetected: int + last_analysis_date: Optional[int] + asn: Optional[int] + as_owner: Optional[str] + country: Optional[str] + link: Optional[str] + + @property + def is_malicious(self) -> bool: + return self.malicious > 0 + + +class VirusTotalClient: + """Minimal VirusTotal v3 client for IP lookups with simple backoff. + + Reads API key from env var VT_API_KEY. + """ + + BASE = "https://www.virustotal.com/api/v3/ip_addresses/" + + def __init__( + self, + api_key: Optional[str] = None, + timeout: float = 15.0, + rate: Optional[RateLimitConfig] = None, + proxies: Optional[ProxyRotator] = None, + ): + env_multi = os.getenv("VT_API_KEYS", "").strip() + if env_multi: + self.api_keys = [k.strip() for k in env_multi.split(",") if k.strip()] + else: + single = api_key or os.getenv("VT_API_KEY") + self.api_keys = [single] if single else [] + self._key_index = 0 + self.timeout = timeout + self.ratelimiter = RateLimiter(rate or RateLimitConfig(per_second=1.0, burst=1)) + self.session = requests.Session() + self.proxies = proxies or ProxyRotator.from_env() + + def enabled(self) -> bool: + return bool(self.api_keys) + + def fetch(self, ip: str) -> Optional[VTResult]: + if not self.enabled(): + return None + url = self.BASE + ip + key = self.api_keys[self._key_index % max(1, len(self.api_keys))] if self.api_keys else None + headers = {"x-apikey": key or ""} + for attempt in range(4): + try: + self.ratelimiter.acquire() + resp = self.session.get( + url, headers=headers, timeout=self.timeout, proxies=(self.proxies.get() if self.proxies.enabled() else None) + ) + if resp.status_code == 200: + return self._parse(resp.json(), ip) + if resp.status_code == 404: + return VTResult( + ip=ip, + malicious=0, + suspicious=0, + harmless=0, + undetected=0, + last_analysis_date=None, + asn=None, + as_owner=None, + country=None, + link=None, + ) + if resp.status_code in (429, 500, 502, 503): + retry_after = resp.headers.get("Retry-After") + if retry_after: + try: + sleep_s = float(retry_after) + except ValueError: + sleep_s = 2 ** attempt + else: + sleep_s = 2 ** attempt + time.sleep(sleep_s) + if len(self.api_keys) > 1 and resp.status_code == 429: + self._key_index = (self._key_index + 1) % len(self.api_keys) + continue + if resp.status_code == 403: + # Forbidden (possibly IP-level). Rotate proxy if configured and retry with backoff. + if self.proxies.enabled(): + self.proxies.rotate() + if len(self.api_keys) > 1: + self._key_index = (self._key_index + 1) % len(self.api_keys) + time.sleep(2 ** attempt) + continue + # Other errors: try to parse message for context + try: + err = resp.json() + except json.JSONDecodeError: + err = {"error": resp.text} + raise RuntimeError(f"VT error {resp.status_code}: {err}") + except requests.RequestException as e: + if attempt == 3: + raise + time.sleep(2 ** attempt) + return None + + @staticmethod + def _parse(data: Dict[str, Any], ip: str) -> VTResult: + d = data.get("data", {}) + attrs = d.get("attributes", {}) + stats = attrs.get("last_analysis_stats", {}) + return VTResult( + ip=ip, + malicious=int(stats.get("malicious", 0) or 0), + suspicious=int(stats.get("suspicious", 0) or 0), + harmless=int(stats.get("harmless", 0) or 0), + undetected=int(stats.get("undetected", 0) or 0), + last_analysis_date=attrs.get("last_analysis_date"), + asn=attrs.get("asn"), + as_owner=attrs.get("as_owner"), + country=attrs.get("country"), + link=d.get("links", {}).get("self"), + ) diff --git a/src/enrichers/cti_providers.py b/src/enrichers/cti_providers.py index 6f76f7e..87b0133 100644 --- a/src/enrichers/cti_providers.py +++ b/src/enrichers/cti_providers.py @@ -14,6 +14,22 @@ class AbuseIPDBResult: url: str +@dataclass +class TalosResult: + ip: str + reputation: Optional[str] + owner: Optional[str] + url: str + + +@dataclass +class VirusTotalResult: + ip: str + malicious: Optional[int] + suspicious: Optional[int] + url: str + + def fetch_abuseipdb(ip: str, timeout: float = 15.0) -> AbuseIPDBResult: # Lazy imports to keep tests independent of optional deps try: @@ -78,3 +94,59 @@ def _extract_text(patterns): country=country, url=url, ) + + +def fetch_talos(ip: str, timeout: float = 15.0) -> TalosResult: + try: + import httpx # type: ignore + except Exception: # pragma: no cover + httpx = None # type: ignore + try: + from bs4 import BeautifulSoup # type: ignore + except Exception: # pragma: no cover + BeautifulSoup = None # type: ignore + + url = f"https://talosintelligence.com/reputation_center/lookup?search={ip}" + if httpx is None or BeautifulSoup is None: # pragma: no cover + return TalosResult(ip=ip, reputation=None, owner=None, url=url) + try: + with httpx.Client(follow_redirects=True, timeout=timeout) as client: + resp = client.get(url) + resp.raise_for_status() + html = resp.text + except Exception: # pragma: no cover + return TalosResult(ip=ip, reputation=None, owner=None, url=url) + soup = BeautifulSoup(html, "html.parser") + text = soup.get_text(" ", strip=True) + rep = None + owner = None + # Heuristic patterns + m = re.search(r"Web Reputation\s*:?\s*([A-Za-z]+)", text, re.IGNORECASE) + if m: + rep = m.group(1).strip() + m = re.search(r"Owner\s*:?\s*([\w\s\-\.,]+)", text, re.IGNORECASE) + if m: + owner = m.group(1).strip() + return TalosResult(ip=ip, reputation=rep, owner=owner, url=url) + + +def fetch_virustotal(ip: str, api_key: Optional[str], timeout: float = 15.0) -> VirusTotalResult: + url = f"https://www.virustotal.com/api/v3/ip_addresses/{ip}" + if not api_key: # pragma: no cover + return VirusTotalResult(ip=ip, malicious=None, suspicious=None, url=url) + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return VirusTotalResult(ip=ip, malicious=None, suspicious=None, url=url) + try: + with httpx.Client(timeout=timeout, headers={"x-apikey": api_key}) as client: + r = client.get(url) + if r.status_code >= 400: + return VirusTotalResult(ip=ip, malicious=None, suspicious=None, url=url) + data = r.json() + stats = data.get("data", {}).get("attributes", {}).get("last_analysis_stats", {}) + mal = stats.get("malicious") + susp = stats.get("suspicious") + return VirusTotalResult(ip=ip, malicious=mal, suspicious=susp, url=url) + except Exception: # pragma: no cover + return VirusTotalResult(ip=ip, malicious=None, suspicious=None, url=url) diff --git a/src/enrichers/cti_providers_ext.py b/src/enrichers/cti_providers_ext.py new file mode 100644 index 0000000..e345b20 --- /dev/null +++ b/src/enrichers/cti_providers_ext.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional, List, Dict + + +@dataclass +class OTXResult: + ip: str + pulse_count: Optional[int] + reputation: Optional[int] + url: str + + +@dataclass +class GreyNoiseResult: + ip: str + classification: Optional[str] # benign|malicious|unknown + name: Optional[str] + url: str + + +@dataclass +class ThreatFoxResult: + ip: str + matches: Optional[int] + url: str + + +@dataclass +class IPInfoResult: + ip: str + org: Optional[str] + country: Optional[str] + city: Optional[str] + url: str + + +def fetch_otx(ip: str, api_key: Optional[str], timeout: float = 15.0) -> OTXResult: + url = f"https://otx.alienvault.com/api/v1/indicators/IPv4/{ip}/general" + if not api_key: + return OTXResult(ip=ip, pulse_count=None, reputation=None, url=url) + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return OTXResult(ip=ip, pulse_count=None, reputation=None, url=url) + try: + with httpx.Client(timeout=timeout, headers={"X-OTX-API-KEY": api_key}) as client: + r = client.get(url) + if r.status_code >= 400: + return OTXResult(ip=ip, pulse_count=None, reputation=None, url=url) + data = r.json() + pulse_info = data.get("pulse_info", {}) + count = int(pulse_info.get("count") or 0) + rep = data.get("reputation") + try: + rep = int(rep) if rep is not None else None + except Exception: + rep = None + return OTXResult(ip=ip, pulse_count=count, reputation=rep, url=url) + except Exception: # pragma: no cover + return OTXResult(ip=ip, pulse_count=None, reputation=None, url=url) + + +def fetch_greynoise(ip: str, api_key: Optional[str], timeout: float = 15.0) -> GreyNoiseResult: + url = f"https://api.greynoise.io/v3/community/{ip}" + if not api_key: + return GreyNoiseResult(ip=ip, classification=None, name=None, url=url) + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return GreyNoiseResult(ip=ip, classification=None, name=None, url=url) + try: + with httpx.Client(timeout=timeout, headers={"key": api_key}) as client: + r = client.get(url) + if r.status_code >= 400: + return GreyNoiseResult(ip=ip, classification=None, name=None, url=url) + data = r.json() + return GreyNoiseResult( + ip=ip, + classification=data.get("classification"), + name=data.get("name"), + url=url, + ) + except Exception: # pragma: no cover + return GreyNoiseResult(ip=ip, classification=None, name=None, url=url) + + +def fetch_threatfox(ip: str, timeout: float = 15.0) -> ThreatFoxResult: + url = "https://threatfox-api.abuse.ch/api/v1/" + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return ThreatFoxResult(ip=ip, matches=None, url=url) + try: + with httpx.Client(timeout=timeout) as client: + r = client.post(url, json={"query": "search_ioc", "search_term": ip}) + if r.status_code >= 400: + return ThreatFoxResult(ip=ip, matches=None, url=url) + data = r.json() + # Response has { query_status, data: [ ... ] } + arr: List[Dict[str, object]] = data.get("data") or [] + return ThreatFoxResult(ip=ip, matches=len(arr) if isinstance(arr, list) else 0, url=url) + except Exception: # pragma: no cover + return ThreatFoxResult(ip=ip, matches=None, url=url) + + +def fetch_ipinfo(ip: str, token: Optional[str], timeout: float = 10.0) -> IPInfoResult: + url = f"https://ipinfo.io/{ip}/json" + # IPInfo allows limited anonymous queries; token improves reliability + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return IPInfoResult(ip=ip, org=None, country=None, city=None, url=url) + try: + headers = {"Authorization": f"Bearer {token}"} if token else {} + with httpx.Client(timeout=timeout, headers=headers) as client: + r = client.get(url) + if r.status_code >= 400: + return IPInfoResult(ip=ip, org=None, country=None, city=None, url=url) + data = r.json() + return IPInfoResult( + ip=ip, + org=data.get("org"), + country=data.get("country"), + city=data.get("city"), + url=url, + ) + except Exception: # pragma: no cover + return IPInfoResult(ip=ip, org=None, country=None, city=None, url=url) + diff --git a/src/enrichers/cti_service.py b/src/enrichers/cti_service.py index 79bab2e..8b4ee5d 100644 --- a/src/enrichers/cti_service.py +++ b/src/enrichers/cti_service.py @@ -5,7 +5,24 @@ from pathlib import Path import json -from .cti_providers import fetch_abuseipdb, AbuseIPDBResult +from .cti_providers import ( + fetch_abuseipdb, + AbuseIPDBResult, + fetch_talos, + TalosResult, + fetch_virustotal, + VirusTotalResult, +) +from .cti_providers_ext import ( + fetch_otx, + OTXResult, + fetch_greynoise, + GreyNoiseResult, + fetch_threatfox, + ThreatFoxResult, + fetch_ipinfo, + IPInfoResult, +) @dataclass @@ -17,6 +34,30 @@ class CTIRecord: country: Optional[str] = None url: Optional[str] = None risk: str = "unknown" # low/medium/high/unknown + # Talos + talos_reputation: Optional[str] = None + talos_owner: Optional[str] = None + talos_url: Optional[str] = None + # VirusTotal + vt_malicious: Optional[int] = None + vt_suspicious: Optional[int] = None + vt_url: Optional[str] = None + # OTX + otx_pulse_count: Optional[int] = None + otx_reputation: Optional[int] = None + otx_url: Optional[str] = None + # GreyNoise + greynoise_classification: Optional[str] = None + greynoise_name: Optional[str] = None + greynoise_url: Optional[str] = None + # ThreatFox + threatfox_matches: Optional[int] = None + threatfox_url: Optional[str] = None + # IPInfo (enrichment only) + ipinfo_org: Optional[str] = None + ipinfo_country: Optional[str] = None + ipinfo_city: Optional[str] = None + ipinfo_url: Optional[str] = None def to_dict(self) -> Dict[str, object]: return asdict(self) @@ -34,6 +75,38 @@ def _score_to_risk(score: Optional[int], reports: Optional[int]) -> str: return "low" +def _merge_risk(base: str, talos_rep: Optional[str], vt_mal: Optional[int], vt_susp: Optional[int]) -> str: + # Upgrade risk based on Talos/VirusTotal signals + r = base + rep = (talos_rep or "").lower() + if rep in {"untrusted", "malicious"}: + r = "high" + elif rep in {"questionable"} and r == "low": + r = "medium" + mal = vt_mal or 0 + susp = vt_susp or 0 + if mal >= 5: + r = "high" + elif mal >= 1 or susp >= 3: + if r == "low": + r = "medium" + return r + + +def _merge_risk_ext(current: str, + otx_pulses: Optional[int], + greynoise_cls: Optional[str], + threatfox_matches: Optional[int]) -> str: + r = current + if greynoise_cls and greynoise_cls.lower() == "malicious": + r = "high" + if (otx_pulses or 0) >= 3 and r == "low": + r = "medium" + if (threatfox_matches or 0) >= 1: + r = "high" + return r + + def _load_cache(path: Path) -> Dict[str, Dict[str, object]]: if path.exists(): try: @@ -50,56 +123,127 @@ def _save_cache(path: Path, data: Dict[str, Dict[str, object]]) -> None: def cti_for_ips( ips: Iterable[str], - provider: str = "abuseipdb", + providers: Iterable[str] = ("abuseipdb", "talos", "virustotal", "otx", "greynoise", "threatfox", "ipinfo"), cache_path: Path | None = Path("data/cache/cti_cache.json"), force_refresh: bool = False, + virustotal_api_key: Optional[str] = None, + otx_api_key: Optional[str] = None, + greynoise_api_key: Optional[str] = None, + ipinfo_token: Optional[str] = None, + *, + batch_size: int | None = None, + pause_seconds: float = 0.0, + cache_flush_every: int = 10, ) -> Dict[str, CTIRecord]: results: Dict[str, CTIRecord] = {} unique_ips = list(dict.fromkeys(i for i in ips if i)) cache: Dict[str, Dict[str, object]] = {} if cache_path: cache = _load_cache(cache_path) - if provider == "abuseipdb": - for ip in unique_ips: - if not force_refresh and ip in cache: - cached = cache[ip] - rec = CTIRecord( - ip=ip, - source="abuseipdb", - abuse_confidence_score=cached.get("abuse_confidence_score"), - total_reports=cached.get("total_reports"), - country=cached.get("country"), - url=cached.get("url"), - ) - else: - r: AbuseIPDBResult = fetch_abuseipdb(ip) - rec = CTIRecord( - ip=ip, - source="abuseipdb", - abuse_confidence_score=r.abuse_confidence_score, - total_reports=r.total_reports, - country=r.country, - url=r.url, - ) - if cache_path: - cache[ip] = { - "abuse_confidence_score": rec.abuse_confidence_score, - "total_reports": rec.total_reports, - "country": rec.country, - "url": rec.url, - } - rec = CTIRecord( - ip=rec.ip, - source=rec.source, - abuse_confidence_score=rec.abuse_confidence_score, - total_reports=rec.total_reports, - country=rec.country, - url=rec.url, - ) - rec.risk = _score_to_risk(rec.abuse_confidence_score, rec.total_reports) - results[ip] = rec + processed = 0 + for ip in unique_ips: + cached = cache.get(ip, {}) if cache_path else {} + # Start from cached/base + rec = CTIRecord( + ip=ip, + source="multi", + abuse_confidence_score=cached.get("abuse_confidence_score"), + total_reports=cached.get("total_reports"), + country=cached.get("country"), + url=cached.get("url"), + talos_reputation=cached.get("talos_reputation"), + talos_owner=cached.get("talos_owner"), + talos_url=cached.get("talos_url"), + vt_malicious=cached.get("vt_malicious"), + vt_suspicious=cached.get("vt_suspicious"), + vt_url=cached.get("vt_url"), + otx_pulse_count=cached.get("otx_pulse_count"), + otx_reputation=cached.get("otx_reputation"), + otx_url=cached.get("otx_url"), + greynoise_classification=cached.get("greynoise_classification"), + greynoise_name=cached.get("greynoise_name"), + greynoise_url=cached.get("greynoise_url"), + threatfox_matches=cached.get("threatfox_matches"), + threatfox_url=cached.get("threatfox_url"), + ipinfo_org=cached.get("ipinfo_org"), + ipinfo_country=cached.get("ipinfo_country"), + ipinfo_city=cached.get("ipinfo_city"), + ipinfo_url=cached.get("ipinfo_url"), + ) + # Fetch live if force or missing + if force_refresh or rec.abuse_confidence_score is None and ("abuseipdb" in providers): + a: AbuseIPDBResult = fetch_abuseipdb(ip) + rec.abuse_confidence_score = a.abuse_confidence_score + rec.total_reports = a.total_reports + rec.country = a.country + rec.url = a.url + if force_refresh or rec.talos_reputation is None and ("talos" in providers): + t: TalosResult = fetch_talos(ip) + rec.talos_reputation = t.reputation + rec.talos_owner = t.owner + rec.talos_url = t.url + if force_refresh or rec.vt_malicious is None and ("virustotal" in providers): + v: VirusTotalResult = fetch_virustotal(ip, virustotal_api_key) + rec.vt_malicious = v.malicious + rec.vt_suspicious = v.suspicious + rec.vt_url = v.url + if force_refresh or rec.otx_pulse_count is None and ("otx" in providers): + o: OTXResult = fetch_otx(ip, otx_api_key) + rec.otx_pulse_count = o.pulse_count + rec.otx_reputation = o.reputation + rec.otx_url = o.url + if force_refresh or rec.greynoise_classification is None and ("greynoise" in providers): + g: GreyNoiseResult = fetch_greynoise(ip, greynoise_api_key) + rec.greynoise_classification = g.classification + rec.greynoise_name = g.name + rec.greynoise_url = g.url + if force_refresh or rec.threatfox_matches is None and ("threatfox" in providers): + tf: ThreatFoxResult = fetch_threatfox(ip) + rec.threatfox_matches = tf.matches + rec.threatfox_url = tf.url + if force_refresh or rec.ipinfo_org is None and ("ipinfo" in providers): + ii: IPInfoResult = fetch_ipinfo(ip, ipinfo_token) + rec.ipinfo_org = ii.org + rec.ipinfo_country = ii.country + rec.ipinfo_city = ii.city + rec.ipinfo_url = ii.url + # Compute risk + base = _score_to_risk(rec.abuse_confidence_score, rec.total_reports) + rec.risk = _merge_risk(base, rec.talos_reputation, rec.vt_malicious, rec.vt_suspicious) + rec.risk = _merge_risk_ext(rec.risk, rec.otx_pulse_count, rec.greynoise_classification, rec.threatfox_matches) + results[ip] = rec if cache_path: + cache[ip] = { + "abuse_confidence_score": rec.abuse_confidence_score, + "total_reports": rec.total_reports, + "country": rec.country, + "url": rec.url, + "talos_reputation": rec.talos_reputation, + "talos_owner": rec.talos_owner, + "talos_url": rec.talos_url, + "vt_malicious": rec.vt_malicious, + "vt_suspicious": rec.vt_suspicious, + "vt_url": rec.vt_url, + "otx_pulse_count": rec.otx_pulse_count, + "otx_reputation": rec.otx_reputation, + "otx_url": rec.otx_url, + "greynoise_classification": rec.greynoise_classification, + "greynoise_name": rec.greynoise_name, + "greynoise_url": rec.greynoise_url, + "threatfox_matches": rec.threatfox_matches, + "threatfox_url": rec.threatfox_url, + "ipinfo_org": rec.ipinfo_org, + "ipinfo_country": rec.ipinfo_country, + "ipinfo_city": rec.ipinfo_city, + "ipinfo_url": rec.ipinfo_url, + } + processed += 1 + # Optional pause and periodic cache flush for resiliency on large batches + if cache_path and processed % max(1, cache_flush_every) == 0: _save_cache(cache_path, cache) - else: - raise ValueError(f"Unsupported CTI provider: {provider}") + if batch_size and (processed % batch_size == 0) and pause_seconds > 0: + import time as _t + _t.sleep(pause_seconds) + if cache_path: + _save_cache(cache_path, cache) return results diff --git a/src/enrichers/llm_enricher.py b/src/enrichers/llm_enricher.py index 82bb24c..3298ed3 100644 --- a/src/enrichers/llm_enricher.py +++ b/src/enrichers/llm_enricher.py @@ -1,7 +1,10 @@ from __future__ import annotations import json -from typing import Dict, List +from typing import Dict, List, Iterable, Tuple, Optional +from datetime import datetime +from ..parsers.ua_analysis import detect_suspicious_user_agent +from ..config import get_settings from ..groq_client import GroqRotatingClient @@ -13,26 +16,126 @@ ) -def enrich_log_records(records: List[Dict[str, object]], use_llm: bool = True) -> List[Dict[str, object]]: +def enrich_log_records( + records: List[Dict[str, object]], + use_llm: bool = True, + *, + llm_sample: Optional[int] = None, + group_by: Optional[List[str]] = None, + group_window_sec: Optional[int] = None, + llm_gate_min_4xx: Optional[int] = None, + llm_gate_ua: bool = False, +) -> List[Dict[str, object]]: + """Enrich records using LLM with optional grouping and sampling to reduce requests. + + - When ``use_llm`` is False, returns pass-through annotations. + - If ``group_by`` is provided, records are grouped by the tuple of those fields and a single + representative from each group is sent to the LLM; the response is propagated to the group. + - If ``llm_sample`` is provided, only that many groups are sent to the LLM and the rest are + annotated with defaults (severity=unknown, rationale="LLM sampled out"). + """ if not use_llm: - # Pass-through with default annotations return [ {**r, "severity": "unknown", "iocs": [r.get("ip")] if r.get("ip") else [], "rationale": "LLM disabled"} for r in records ] + # Helper to build group key + def _parse_time_bucket(rec: Dict[str, object]) -> Optional[int]: + if not group_window_sec: + return None + raw = str(rec.get("time") or rec.get("timestamp") or "").strip() + if not raw: + return None + # Try common formats + for fmt in ("%d/%b/%Y:%H:%M:%S %z", "%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M:%S.%f%z"): + try: + dt = datetime.strptime(raw, fmt) + return int(dt.timestamp()) // int(group_window_sec) + except Exception: + continue + return None + + def _key(rec: Dict[str, object]) -> Tuple[object, ...]: + if not group_by: + return (id(rec),) # unique per record so it behaves like "no grouping" + bucket = _parse_time_bucket(rec) + base = tuple(rec.get(k) for k in group_by) + return base + ((bucket,) if bucket is not None else tuple()) + + # Build groups: key -> list of indices + groups: Dict[Tuple[object, ...], List[int]] = {} + per_group_stats: Dict[Tuple[object, ...], Dict[str, int | bool]] = {} + for idx, rec in enumerate(records): + k = _key(rec) + groups.setdefault(k, []).append(idx) + # accumulate stats + st = per_group_stats.setdefault(k, {"count": 0, "errors_4xx": 0, "ua_susp": False}) + st["count"] = int(st["count"]) + 1 + try: + status = int(rec.get("status", 0)) + except Exception: + status = 0 + if 400 <= status < 500: + st["errors_4xx"] = int(st["errors_4xx"]) + 1 + ua_str = str(rec.get("ua") or rec.get("user_agent") or "") + if ua_str: + settings = get_settings() + susp, _ = detect_suspicious_user_agent(ua_str, patterns=settings.suspicious_ua_patterns or None) + st["ua_susp"] = bool(st["ua_susp"]) or susp + + # Select which groups to actually send to LLM + group_keys: List[Tuple[object, ...]] = list(groups.keys()) + # Prefer larger groups first to maximize coverage + group_keys.sort(key=lambda k: len(groups[k]), reverse=True) + # Apply gating if requested + if llm_gate_min_4xx is not None or llm_gate_ua: + gated: List[Tuple[object, ...]] = [] + for k in group_keys: + stats = per_group_stats.get(k, {}) + ok = True + if llm_gate_min_4xx is not None: + ok = ok and int(stats.get("errors_4xx", 0)) >= int(llm_gate_min_4xx) + if llm_gate_ua: + ok = ok and bool(stats.get("ua_susp", False)) + if ok: + gated.append(k) + group_keys = gated + if llm_sample is not None and llm_sample >= 0: + group_keys = group_keys[:llm_sample] + client = GroqRotatingClient() - enriched: List[Dict[str, object]] = [] - for r in records: + # Map group key -> parsed enrichment + parsed_by_group: Dict[Tuple[object, ...], Dict[str, object]] = {} + + # Enrich selected groups + for k in group_keys: + rep_index = groups[k][0] + r = records[rep_index] user = f"Log: {json.dumps(r, ensure_ascii=False)}" - content = client.chat([ - {"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": user}, - ]) + try: + content = client.chat([ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": user}, + ]) + except Exception as e: # budget/rate/network + parsed_by_group[k] = {"severity": "unknown", "iocs": [], "rationale": f"LLM unavailable: {str(e)[:120]}"} + continue try: parsed = json.loads(content) except Exception: parsed = {"severity": "unknown", "iocs": [], "rationale": content[:200]} + parsed_by_group[k] = parsed + + enriched: List[Dict[str, object]] = [] + for idx, r in enumerate(records): + k = _key(r) + parsed = parsed_by_group.get(k) + if parsed is None: + reason = "LLM sampled out" + # If gating was applied, clarify + if llm_gate_min_4xx is not None or llm_gate_ua: + reason = "LLM gated out" + parsed = {"severity": "unknown", "iocs": [r.get("ip")] if r.get("ip") else [], "rationale": reason} enriched.append({**r, **parsed}) return enriched - diff --git a/src/groq_client.py b/src/groq_client.py index 58ee1c0..05de1d8 100644 --- a/src/groq_client.py +++ b/src/groq_client.py @@ -3,6 +3,7 @@ import random import time from typing import Dict, List, Optional +import os from groq import Groq from rich.console import Console @@ -20,6 +21,12 @@ def __init__(self, api_keys: Optional[List[str]] = None, model: Optional[str] = self.model = model or settings.groq_model self._clients = [Groq(api_key=k) for k in self.keys] self._index = 0 + # Simple token budget guard (approximate tokens via chars/4) + try: + self._budget = int(os.getenv("GROQ_TOKENS_BUDGET", "0")) or None + except ValueError: + self._budget = None + self._used = 0 def _next_client(self) -> Groq: if not self._clients: @@ -39,6 +46,11 @@ def chat(self, messages: List[Dict[str, str]], """ last_error: Optional[Exception] = None m = model or self.model + # pre-check budget + if self._budget is not None: + approx_tokens = sum(len(m.get("content", "")) for m in messages) // 4 + 32 + if self._used + approx_tokens > self._budget: + raise RuntimeError("LLM budget exhausted; set GROQ_TOKENS_BUDGET higher or reduce --llm-sample") for attempt in range(max_retries): client = self._next_client() try: @@ -47,6 +59,24 @@ def chat(self, messages: List[Dict[str, str]], messages=messages, temperature=0.2, ) + # book tokens used: prefer API-reported usage when available, else approx + if self._budget is not None: + used_tokens = None + try: + usage = getattr(resp, "usage", None) + if usage is not None: + # Groq/OpenAI-style usage fields + if hasattr(usage, "total_tokens"): + used_tokens = int(getattr(usage, "total_tokens")) + elif hasattr(usage, "prompt_tokens") or hasattr(usage, "completion_tokens"): + pt = int(getattr(usage, "prompt_tokens", 0) or 0) + ct = int(getattr(usage, "completion_tokens", 0) or 0) + used_tokens = pt + ct + except Exception: + used_tokens = None + if used_tokens is None: + used_tokens = sum(len(m.get("content", "")) for m in messages) // 4 + 32 + self._used += int(used_tokens) return resp.choices[0].message.content or "" except Exception as e: # pragma: no cover - network specific last_error = e @@ -59,4 +89,3 @@ def chat(self, messages: List[Dict[str, str]], continue raise raise RuntimeError(f"Groq chat failed after {max_retries} attempts: {last_error}") - diff --git a/src/net/__init__.py b/src/net/__init__.py new file mode 100644 index 0000000..b35c4bf --- /dev/null +++ b/src/net/__init__.py @@ -0,0 +1,2 @@ +"""Networking helpers (proxy rotation, etc.).""" + diff --git a/src/net/proxy.py b/src/net/proxy.py new file mode 100644 index 0000000..52a3bfe --- /dev/null +++ b/src/net/proxy.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import os +import threading +from dataclasses import dataclass +from typing import Dict, List, Optional + + +def _normalize_proxy_url(s: str) -> Optional[str]: + s = s.strip() + if not s: + return None + if "://" not in s: + # Default to http if scheme omitted + s = f"http://{s}" + return s + + +@dataclass +class ProxyConfig: + urls: List[str] + + @classmethod + def from_env(cls) -> "ProxyConfig": + raw = os.getenv("PROXY_LIST", "").strip() + urls = [] + if raw: + for part in raw.split(","): + url = _normalize_proxy_url(part) + if url: + urls.append(url) + # Fallback to traditional env if set (single proxy) + if not urls: + http = os.getenv("HTTP_PROXY") or os.getenv("http_proxy") + https = os.getenv("HTTPS_PROXY") or os.getenv("https_proxy") + if http or https: + # Use https if provided, else http + url = _normalize_proxy_url(https or http) + if url: + urls = [url] + return cls(urls=urls) + + +class ProxyRotator: + """Thread-safe rotator for outbound proxies for requests. + + Note: Proxies are for network resiliency. They must not be used to + bypass provider Terms or rate limits attached to your API keys. + """ + + def __init__(self, cfg: Optional[ProxyConfig] = None) -> None: + self.cfg = cfg or ProxyConfig.from_env() + self._i = 0 + self._lock = threading.Lock() + + @classmethod + def from_env(cls) -> "ProxyRotator": + return cls(ProxyConfig.from_env()) + + def enabled(self) -> bool: + return bool(self.cfg.urls) + + def _current_url(self) -> Optional[str]: + if not self.cfg.urls: + return None + return self.cfg.urls[self._i % len(self.cfg.urls)] + + def rotate(self) -> None: + with self._lock: + if self.cfg.urls: + self._i = (self._i + 1) % len(self.cfg.urls) + + def get(self) -> Optional[Dict[str, str]]: + """Return a requests-compatible proxies mapping for the current proxy. + + Example: {"http": "http://host:port", "https": "http://host:port"} + """ + with self._lock: + url = self._current_url() + if not url: + return None + return {"http": url, "https": url} + diff --git a/src/parsers/ua_analysis.py b/src/parsers/ua_analysis.py index 74c00ae..90e6315 100644 --- a/src/parsers/ua_analysis.py +++ b/src/parsers/ua_analysis.py @@ -1,7 +1,7 @@ from __future__ import annotations import re -from typing import List, Tuple +from typing import List, Tuple, Optional SUSPICIOUS_AGENTS = [ @@ -19,12 +19,12 @@ ] -def detect_suspicious_user_agent(ua: str | None) -> Tuple[bool, str | None]: +def detect_suspicious_user_agent(ua: Optional[str], patterns: Optional[List[str]] = None) -> Tuple[bool, Optional[str]]: if not ua: return False, None ua_l = ua.lower() - for pat in SUSPICIOUS_AGENTS: + pats = patterns if patterns else SUSPICIOUS_AGENTS + for pat in pats: if re.search(pat, ua_l): return True, pat return False, None - diff --git a/src/report/pdf_report.py b/src/report/pdf_report.py new file mode 100644 index 0000000..80d8494 --- /dev/null +++ b/src/report/pdf_report.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +from datetime import datetime, timezone +import base64 +from pathlib import Path +from typing import Iterable, List, Mapping, Dict + +from fpdf import FPDF + + +class PDFReport: + def __init__(self, title: str = "IP Threat Report") -> None: + self.title = title + self.pdf = FPDF() + self.pdf.set_auto_page_break(auto=True, margin=15) + + def _header(self) -> None: + self.pdf.set_font("Helvetica", "B", 16) + self.pdf.cell(0, 10, self.title, ln=True) + self.pdf.set_font("Helvetica", "", 10) + self.pdf.cell(0, 6, f"Generated: {datetime.now(timezone.utc).isoformat()}", ln=True) + + def _summary(self, total: int, malicious: int, suspicious: int) -> None: + self.pdf.ln(4) + self.pdf.set_font("Helvetica", "B", 12) + self.pdf.cell(0, 8, "Summary", ln=True) + self.pdf.set_font("Helvetica", size=11) + self.pdf.cell(0, 6, f"Total IPs scanned: {total}", ln=True) + self.pdf.cell(0, 6, f"Malicious: {malicious} | Suspicious: {suspicious}", ln=True) + + def _bar_chart(self, total: int, malicious: int, suspicious: int, harmless: int) -> None: + if total <= 0: + return + self.pdf.ln(2) + self.pdf.set_font("Helvetica", "B", 11) + self.pdf.cell(0, 7, "Overview", ln=True) + x = self.pdf.get_x() + y = self.pdf.get_y() + width = 180 + height = 8 + # Compute proportional widths + w_mal = width * (malicious / total) + w_sus = width * (suspicious / total) + w_har = max(0.0, width - w_mal - w_sus) + # Malicious - red + self.pdf.set_fill_color(220, 53, 69) + self.pdf.rect(x, y, w_mal, height, style="F") + # Suspicious - orange + self.pdf.set_fill_color(255, 159, 67) + self.pdf.rect(x + w_mal, y, w_sus, height, style="F") + # Harmless - green + self.pdf.set_fill_color(40, 167, 69) + self.pdf.rect(x + w_mal + w_sus, y, w_har, height, style="F") + self.pdf.ln(height + 2) + self.pdf.set_font("Helvetica", size=10) + self.pdf.cell(0, 6, "Legend: red=malicious, orange=suspicious, green=harmless", ln=True) + + def _table_header(self, headers: List[str]) -> None: + self.pdf.set_font("Helvetica", "B", 11) + col_widths = [40, 28, 28, 18, 18, 18, 45] # tuned for A4 + for w, h in zip(col_widths, headers): + self.pdf.cell(w, 8, h, border=1) + self.pdf.ln(8) + + def _sanitize(self, s: str) -> str: + try: + return s.encode("latin-1", errors="replace").decode("latin-1") + except Exception: + return s + + def _table_rows(self, rows: Iterable[Mapping[str, str]]) -> None: + self.pdf.set_font("Helvetica", size=10) + col_widths = [40, 28, 28, 18, 18, 18, 45] + for row in rows: + cells = [ + row.get("ip", ""), + row.get("classification", ""), + row.get("country", ""), + str(row.get("malicious", "")), + str(row.get("suspicious", "")), + str(row.get("harmless", "")), + (row.get("as_owner", "") or "")[:45], + ] + # Row color based on classification + cls = (row.get("classification") or "").lower() + if cls == "malicious": + self.pdf.set_fill_color(255, 235, 238) # light red + elif cls == "suspicious": + self.pdf.set_fill_color(255, 248, 225) # light orange + else: + self.pdf.set_fill_color(245, 255, 245) # very light green + # Draw cells; try to render a flag image in the Country column + for idx, (w, c) in enumerate(zip(col_widths, cells)): + if idx == 2: # country column + x = self.pdf.get_x() + y = self.pdf.get_y() + cc = str(c or "").upper() + flag = Path("data/assets/flags") / f"{cc}.png" + if cc and flag.exists(): + try: + self.pdf.image(str(flag), x=x + 1, y=y + 1, w=5, h=5) + text = f" {cc}" + except Exception: + text = cc + elif cc: + # Create a tiny placeholder flag on-the-fly so PDFs always render + try: + self._ensure_flag_placeholder(flag) + if flag.exists(): + try: + self.pdf.image(str(flag), x=x + 1, y=y + 1, w=5, h=5) + text = f" {cc}" + except Exception: + text = cc + else: + text = cc + except Exception: + text = cc + else: + text = cc + self.pdf.cell(w, 7, self._sanitize(text), border=1, fill=True) + else: + self.pdf.cell(w, 7, self._sanitize(str(c)), border=1, fill=True) + self.pdf.ln(7) + + def _ensure_flag_placeholder(self, path: Path) -> None: + """Ensure a minimal placeholder PNG exists at the given path. + + We write a 1x1 transparent PNG so the layout remains consistent. + """ + try: + path.parent.mkdir(parents=True, exist_ok=True) + if path.exists(): + return + # 1x1 transparent PNG + b64 = ( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAusB9Yb4UvoAAAAASUVORK5CYII=" + ) + data = base64.b64decode(b64) + with path.open("wb") as f: + f.write(data) + except Exception: + # best-effort; ignore failures + pass + + def build(self, malicious_rows: List[Mapping[str, str]], summary: Mapping[str, int], ai_summary: str | None = None) -> bytes: + self.pdf.add_page() + self._header() + # Optional AI executive summary + if ai_summary: + self.pdf.ln(3) + self.pdf.set_font("Helvetica", "B", 12) + self.pdf.cell(0, 8, "AI Executive Summary", ln=True) + self.pdf.set_font("Helvetica", size=11) + # Render in wrapped cells + for paragraph in ai_summary.strip().split("\n"): + if paragraph.strip(): + self.pdf.multi_cell(0, 6, self._sanitize(paragraph.strip())) + self.pdf.ln(2) + self._summary( + total=summary.get("total", 0), + malicious=summary.get("malicious", 0), + suspicious=summary.get("suspicious", 0), + ) + self._bar_chart( + total=summary.get("total", 0), + malicious=summary.get("malicious", 0), + suspicious=summary.get("suspicious", 0), + harmless=summary.get("harmless", 0), + ) + self.pdf.ln(4) + self.pdf.set_font("Helvetica", "B", 12) + self.pdf.cell(0, 8, "Malicious IPs", ln=True) + headers = ["IP", "Class", "Country", "Mal", "Susp", "Harmless", "AS Owner"] + self._table_header(headers) + self._table_rows(malicious_rows) + # Second page: Top countries + counts: Dict[str, int] = {} + for r in malicious_rows: + c = (r.get("country") or "").upper() + if not c: + continue + counts[c] = counts.get(c, 0) + 1 + if counts: + self.pdf.add_page() + self.pdf.set_font("Helvetica", "B", 12) + self.pdf.cell(0, 8, "Top Countries (by malicious count)", ln=True) + self.pdf.set_font("Helvetica", size=11) + for country, cnt in sorted(counts.items(), key=lambda x: x[1], reverse=True)[:15]: + self.pdf.cell(0, 6, f"{country}: {cnt}", ln=True) + data = self.pdf.output(dest="S") + try: + return bytes(data) + except Exception: + # Fallback: older FPDF may return str + return str(data).encode("latin-1", errors="replace") diff --git a/src/reports/report_builder.py b/src/reports/report_builder.py index bc5f85d..06bb121 100644 --- a/src/reports/report_builder.py +++ b/src/reports/report_builder.py @@ -32,8 +32,8 @@ def build_markdown_report( if not suspicious: lines.append("No suspicious IPs identified.\n") else: - lines.append(_md_row(["IP", "Risk", "Abuse Score", "Total Reports", "Country", "Requests", "4xx", "Suspicious UA", "One-line Explain"])) - lines.append(_md_row(["---"] * 9)) + lines.append(_md_row(["IP", "Risk", "Abuse Score", "Total Reports", "Country", "Requests", "4xx", "Suspicious UA", "Talos", "VT (mal/susp)", "One-line Explain"])) + lines.append(_md_row(["---"] * 11)) for s in suspicious: lines.append( _md_row([ @@ -45,6 +45,8 @@ def build_markdown_report( str(s.get("requests", "")), str(s.get("errors_4xx", "")), "yes" if s.get("ua_suspicious") else "no", + str(s.get("talos_reputation", "")), + f"{s.get('vt_malicious','')}/{s.get('vt_suspicious','')}", str(s.get("ai_one_liner", "")), ]) ) @@ -80,7 +82,8 @@ def build_text_report( lines.append( f"- {s.get('ip')} | risk={s.get('risk')} | score={s.get('abuse_confidence_score')} | " f"reports={s.get('total_reports')} | country={s.get('country')} | req={s.get('requests')} | " - f"4xx={s.get('errors_4xx')} | UA suspicious={'yes' if s.get('ua_suspicious') else 'no'}\n" + f"4xx={s.get('errors_4xx')} | UA suspicious={'yes' if s.get('ua_suspicious') else 'no'} | " + f"talos={s.get('talos_reputation')} | vt={s.get('vt_malicious')}/{s.get('vt_suspicious')}\n" ) if s.get("ai_one_liner"): lines.append(f" AI: {s.get('ai_one_liner')}\n") @@ -88,3 +91,32 @@ def build_text_report( path.write_text("".join(lines), encoding="utf-8") return path + +def build_malicious_ai_report( + out_dir: Path, + content: str, + *, + title: str = "Malicious Activity AI Report", +) -> tuple[Path, Path]: + """Write a detailed AI-written malicious activity report to txt and md. + + Returns: (txt_path, md_path) + """ + out_dir.mkdir(parents=True, exist_ok=True) + txt_path = out_dir / "malicious_ai_report.txt" + md_path = out_dir / "malicious_ai_report.md" + + # Text version + lines_txt: List[str] = [] + lines_txt.append(f"{title}\n") + lines_txt.append("=" * len(title) + "\n\n") + lines_txt.append(content.strip() + "\n") + txt_path.write_text("".join(lines_txt), encoding="utf-8") + + # Markdown version + lines_md: List[str] = [] + lines_md.append(f"# {title}\n\n") + lines_md.append(content.strip() + "\n") + md_path.write_text("".join(lines_md), encoding="utf-8") + + return txt_path, md_path diff --git a/src/ui/streamlit_app.py b/src/ui/streamlit_app.py new file mode 100644 index 0000000..321c15f --- /dev/null +++ b/src/ui/streamlit_app.py @@ -0,0 +1,162 @@ +from __future__ import annotations + +import io +import os +from pathlib import Path +from typing import List + +import pandas as pd +import streamlit as st + +from src.core.scanner import ScanOptions, parse_ips, scan_ips_enrich +from src.report.pdf_report import PDFReport +from src.groq_client import GroqRotatingClient + + +def country_flag(code: str | None) -> str: + if not code: + return "" + code = code.upper() + if len(code) != 2 or not code.isalpha(): + return code + # Regional indicator symbols start at 0x1F1E6 for 'A' + try: + return chr(0x1F1E6 + ord(code[0]) - ord('A')) + chr(0x1F1E6 + ord(code[1]) - ord('A')) + except Exception: + return code + + +st.set_page_config(page_title="LogCTIAI – IP Scanner", layout="wide") +st.title("🔎 IP Scanner – CTI Enriched Report") +st.write("Upload IP list, enrich with CTI, and download a clean PDF report.") + +with st.sidebar: + st.header("Settings") + vt_key = st.text_input("VirusTotal API Key", type="password", help="Used only for this session") + abip_key = st.text_input("AbuseIPDB API Key", type="password", help="Optional; improves detection") + include_susp = st.checkbox("Include suspicious in report", value=False) + use_ai = st.checkbox("AI executive summary in PDF", value=False, help="Uses GROQ keys from .env if configured") + +uploaded = st.file_uploader("Upload .txt with one IP per line", type=["txt"]) +text_ips = st.text_area("…or paste IPs (one per line)") + +def gather_ips() -> List[str]: + lines: List[str] = [] + if uploaded is not None: + content = uploaded.read().decode("utf-8", errors="ignore") + lines.extend(content.splitlines()) + if text_ips.strip(): + lines.extend(text_ips.splitlines()) + return parse_ips(lines) + + +col_run, col_info = st.columns([1, 3]) +with col_run: + run = st.button("Run Scan", type="primary") +with col_info: + st.info("Tip: VT/AbuseIPDB keys improve accuracy. AI summary is optional.") + +if run: + ips = gather_ips() + if not ips: + st.warning("No valid IPs provided.") + st.stop() + + if vt_key: + os.environ["VT_API_KEY"] = vt_key + if abip_key: + os.environ["ABUSEIPDB_API_KEY"] = abip_key + + # Keep options simple and sensible by default + opts = ScanOptions(cti_max=200, use_cache=True, no_cti=False) + + progress = st.progress(0) + status = st.empty() + + def on_prog(i: int, t: int) -> None: + progress.progress(min(1.0, i / max(1, t))) + status.write(f"Scanning {i}/{t}") + + rows, summary, errors = scan_ips_enrich(ips, opts, on_progress=on_prog) + + st.success(f"Scanned {summary['total']} IPs • Malicious: {summary['malicious']} • Suspicious: {summary['suspicious']}") + if errors: + with st.expander("Show errors"): + st.write("\n".join(errors)) + + # Filter rows + rows = [r for r in rows if r["classification"] in ("malicious", "suspicious" if include_susp else "malicious")] + + if rows: + df = pd.DataFrame([{**r, "flag": country_flag(r.get("country"))} for r in rows]) + agg = pd.DataFrame( + { + "class": ["malicious", "suspicious", "harmless"], + "count": [summary["malicious"], summary["suspicious"], summary["harmless"]], + } + ) + + a, b = st.columns([1, 2]) + with a: + st.subheader("Summary") + st.metric("IPs", summary["total"]) + st.metric("Malicious", summary["malicious"]) + st.metric("Suspicious", summary["suspicious"]) + with b: + st.subheader("Distribution") + st.bar_chart(agg.set_index("class")) + + st.subheader("Findings") + st.dataframe(df[["flag", "ip", "classification", "country", "malicious", "suspicious", "harmless", "as_owner"]], use_container_width=True) + + # Optional AI executive summary via GROQ + ai_summary: str | None = None + if use_ai: + with st.spinner("Generating AI executive summary…"): + try: + client = GroqRotatingClient() + # Keep prompt compact: provide summary and up to 20 top rows + sample_rows = [ + {k: r[k] for k in ("ip", "classification", "country", "malicious", "suspicious", "harmless", "as_owner")} + for r in rows[:20] + ] + user = ( + "Write a concise 80-120 word executive summary for a security PDF report. " + "Highlight overall risk level, notable patterns (countries/ASNs), and clear next steps.\n" + f"SUMMARY: {summary}\nROWS: {sample_rows}" + ) + ai_summary = client.chat([ + {"role": "system", "content": "You are a senior SOC analyst writing executive summaries for CISOs."}, + {"role": "user", "content": user}, + ]) + except Exception as e: + st.info(f"AI summary unavailable: {e}") + + # Generate PDF + pdf = PDFReport() + blob = pdf.build( + malicious_rows=[ + { + "ip": r["ip"], + "classification": r["classification"], + "country": r["country"], + "malicious": str(r["malicious"]), + "suspicious": str(r["suspicious"]), + "harmless": str(r["harmless"]), + "as_owner": r["as_owner"], + } + for r in rows + ], + summary=summary, + ai_summary=ai_summary, + ) + + st.download_button("Download PDF report", data=blob, file_name="ip_threat_report.pdf", mime="application/pdf") + + # Save to default processed directory + out_path = Path("data/processed") + out_path.mkdir(parents=True, exist_ok=True) + with (out_path / "ip_threat_report.pdf").open("wb") as f: + f.write(blob) + else: + st.info("No malicious findings. Use 'Include suspicious' to broaden the view.") diff --git a/tests/cli/test_ai_malicious_report.py b/tests/cli/test_ai_malicious_report.py new file mode 100644 index 0000000..1f7e09a --- /dev/null +++ b/tests/cli/test_ai_malicious_report.py @@ -0,0 +1,41 @@ +import os +from pathlib import Path + +from src import cli + + +class DummyGroq: + def chat(self, messages): + return "DUMMY MALICIOUS REPORT" + + +def test_ai_malicious_report_offline_blocklist(tmp_path, monkeypatch): + # Prepare a log with one IP that will be escalated via offline blocklist + log = tmp_path / "access_log.txt" + log.write_text( + '\n'.join([ + '10.9.9.9 - - [10/Oct/2000:13:55:36 -0700] "GET /a HTTP/1.1" 404 0 "-" "sqlmap/1.7"', + '10.9.9.9 - - [10/Oct/2000:13:55:40 -0700] "GET /b HTTP/1.1" 404 0 "-" "sqlmap/1.7"', + ]), + encoding="utf-8", + ) + # Create offline blocklist and point env var to it so risk escalates to high + bl = tmp_path / "blocklist.txt" + bl.write_text("10.9.9.9\n", encoding="utf-8") + monkeypatch.setenv("OFFLINE_IP_BLOCKLIST", str(bl)) + # Ensure LLM path is taken; set a dummy key and monkeypatch client + monkeypatch.setenv("GROQ_API_KEYS", "dummy-key") + monkeypatch.setattr(cli, "GroqRotatingClient", lambda: DummyGroq()) + + rc = cli.main([ + str(log), + "--out", str(tmp_path), + "--no-cti", # avoid live CTI calls + "--ai-malicious-report", + "--color", "never", + ]) + assert rc == 0 + rpt = tmp_path / "reports" / "malicious_ai_report.txt" + assert rpt.exists() + assert "DUMMY MALICIOUS REPORT" in rpt.read_text(encoding="utf-8") + diff --git a/tests/cli/test_cli_main.py b/tests/cli/test_cli_main.py index 7cc7c56..f5f7bb5 100644 --- a/tests/cli/test_cli_main.py +++ b/tests/cli/test_cli_main.py @@ -28,3 +28,18 @@ def test_cli_main_unsupported(tmp_path: Path): p.write_bytes(b"\x00\x01") rc = cli.main([str(p), "--out", str(tmp_path), "--no-llm", "--no-cti", "--no-reports"]) assert rc == 2 + + +def test_cli_main_txt_log_autodetect(tmp_path: Path): + txt_log = tmp_path / "new_log.txt" + txt_log.write_text('127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /a HTTP/1.1" 200 123 "-" "Mozilla/5.0"\n', encoding="utf-8") + rc = cli.main([ + str(txt_log), + "--out", str(tmp_path), + "--no-llm", "--no-cti", "--no-reports", + "--color", "never", + ]) + assert rc == 0 + # Confirm log output exists + out_jsonl = tmp_path / "new_log.jsonl" + assert out_jsonl.exists() diff --git a/tests/core/test_parse_and_scan_offline.py b/tests/core/test_parse_and_scan_offline.py new file mode 100644 index 0000000..238d378 --- /dev/null +++ b/tests/core/test_parse_and_scan_offline.py @@ -0,0 +1,19 @@ +from pathlib import Path + +from src.core.scanner import ScanOptions, parse_ips, scan_ips_list + + +def test_parse_ips_filters_invalid(): + ips = parse_ips(["8.8.8.8", "not-an-ip", "1.1.1.1", "#comment", " "]) + assert ips == ["8.8.8.8", "1.1.1.1"] + + +def test_scan_offline_counts(tmp_path: Path): + ips = ["8.8.8.8", "1.1.1.1", "8.8.8.8"] + opts = ScanOptions(cti_max=-1, no_cti=True) + results, summary, errors = scan_ips_list(ips, opts) + assert summary["total"] == 2 + assert summary["malicious"] == 0 + assert summary["suspicious"] == 0 + assert isinstance(errors, list) + diff --git a/tests/enrichers/test_cti_providers_parsing.py b/tests/enrichers/test_cti_providers_parsing.py new file mode 100644 index 0000000..027fafd --- /dev/null +++ b/tests/enrichers/test_cti_providers_parsing.py @@ -0,0 +1,98 @@ +import types + +import pytest + +from src.enrichers.cti_providers import ( + fetch_abuseipdb, + fetch_talos, + fetch_virustotal, +) + + +class _Resp: + def __init__(self, text: str = "", status: int = 200, json_data=None): + self.text = text + self.status_code = status + self._json = json_data or {} + + def raise_for_status(self): + if self.status_code >= 400: + raise RuntimeError("http error") + + def json(self): + return self._json + + +class _Client: + def __init__(self, *, text: str = "", status: int = 200, json_data=None, **_: object): + self._resp = _Resp(text=text, status=status, json_data=json_data) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def get(self, url: str): # noqa: ARG002 - exercised by provider code + return self._resp + + +def test_fetch_abuseipdb_parses_html(monkeypatch): + html = """ + +
Abuse Confidence Score: 90
+
Total Reports: 123
+
Country: United States
+ + """ + + # Patch httpx.Client to our stub + import httpx # type: ignore + + monkeypatch.setattr(httpx, "Client", lambda **kwargs: _Client(text=html)) + + res = fetch_abuseipdb("1.2.3.4") + assert res.ip == "1.2.3.4" + assert res.abuse_confidence_score == 90 + assert res.total_reports == 123 + assert res.country == "United States" + assert "abuseipdb" in res.url + + +def test_fetch_talos_parses_html(monkeypatch): + html = """ + +
Web Reputation: Malicious
+
Owner: Example ISP, Inc.
+ + """ + import httpx # type: ignore + + monkeypatch.setattr(httpx, "Client", lambda **kwargs: _Client(text=html)) + + res = fetch_talos("5.6.7.8") + assert res.ip == "5.6.7.8" + assert res.reputation == "Malicious" + assert res.owner == "Example ISP, Inc." + assert "talos" in res.url + + +def test_fetch_virustotal_parses_json(monkeypatch): + payload = { + "data": { + "attributes": { + "last_analysis_stats": {"malicious": 2, "suspicious": 3} + } + } + } + + import httpx # type: ignore + + monkeypatch.setattr(httpx, "Client", lambda **kwargs: _Client(json_data=payload)) + + res = fetch_virustotal("9.9.9.9", api_key="dummy") + assert res.ip == "9.9.9.9" + assert res.malicious == 2 + assert res.suspicious == 3 + assert "virustotal" in res.url + diff --git a/tests/groq_client/test_groq_rotating_client.py b/tests/groq_client/test_groq_rotating_client.py new file mode 100644 index 0000000..70a5819 --- /dev/null +++ b/tests/groq_client/test_groq_rotating_client.py @@ -0,0 +1,49 @@ +from types import SimpleNamespace + +import pytest + +import src.groq_client as gc + + +class FakeCompletions: + def __init__(self, content: str): + self._content = content + + def create(self, model, messages, temperature): # noqa: ARG002 + return SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content=self._content))]) + + +class FakeChat: + def __init__(self, content: str): + self.completions = FakeCompletions(content) + + +class FakeGroq: + def __init__(self, api_key: str): + # Expose api_key for assertions via _next_client + self.api_key = api_key + self.chat = FakeChat("ok") + + +def test_next_client_rotates(monkeypatch): + # Patch Groq class to our fake + monkeypatch.setattr(gc, "Groq", FakeGroq) + client = gc.GroqRotatingClient(api_keys=["k1", "k2"], model="m") + + c1 = client._next_client() + c2 = client._next_client() + c3 = client._next_client() + assert getattr(c1, "api_key", None) == "k1" + assert getattr(c2, "api_key", None) == "k2" + assert getattr(c3, "api_key", None) == "k1" + + +def test_chat_success_path(monkeypatch): + monkeypatch.setattr(gc, "Groq", FakeGroq) + client = gc.GroqRotatingClient(api_keys=["kX"], model="m") + out = client.chat([ + {"role": "system", "content": "s"}, + {"role": "user", "content": "u"}, + ]) + assert out == "ok" + diff --git a/tests/parsers/test_text_extractor_pdf.py b/tests/parsers/test_text_extractor_pdf.py new file mode 100644 index 0000000..0abe108 --- /dev/null +++ b/tests/parsers/test_text_extractor_pdf.py @@ -0,0 +1,19 @@ +from pathlib import Path + +from PyPDF2 import PdfWriter + +from src.parsers.text_extractor import extract_text_from_pdf + + +def test_extract_text_from_pdf_blank(tmp_path: Path): + pdf_path = tmp_path / "blank.pdf" + writer = PdfWriter() + writer.add_blank_page(width=72, height=72) + with pdf_path.open("wb") as f: + writer.write(f) + + text = extract_text_from_pdf(pdf_path) + # Blank page yields empty string but exercises the code path + assert isinstance(text, str) + assert text == "" + diff --git a/tests/report/test_pdf_build.py b/tests/report/test_pdf_build.py new file mode 100644 index 0000000..c075d06 --- /dev/null +++ b/tests/report/test_pdf_build.py @@ -0,0 +1,20 @@ +from src.report.pdf_report import PDFReport + + +def test_pdf_build_nonempty(): + pdf = PDFReport() + rows = [ + { + "ip": "8.8.8.8", + "classification": "malicious", + "country": "US", + "malicious": "5", + "suspicious": "0", + "harmless": "0", + "as_owner": "GOOGLE", + } + ] + blob = pdf.build(rows, {"total": 1, "malicious": 1, "suspicious": 0, "harmless": 0}) + assert isinstance(blob, (bytes, bytearray)) + assert len(blob) > 1000 + diff --git a/ui/app.py b/ui/app.py new file mode 100644 index 0000000..97748fb --- /dev/null +++ b/ui/app.py @@ -0,0 +1,266 @@ +import json +import os +import subprocess +from pathlib import Path + +import pandas as pd +import streamlit as st +from streamlit_autorefresh import st_autorefresh +from dotenv import dotenv_values, set_key + + +st.set_page_config(page_title="LogCTI Dashboard", page_icon="🛡️", layout="wide") + +# Header with branding and GitHub link +hdr_l, hdr_c, hdr_r = st.columns([3, 2, 2]) +with hdr_l: + st.markdown(""" +
+ PierringShot Electronics + × + Azerbaijan Cybersecurity Center +
Log + CTI Interactive Dashboard
+
+ """, unsafe_allow_html=True) +with hdr_r: + repo_url = "https://github.com/Azerbaijan-Cybersecurity-Center/LogCTIAI" + if hasattr(st, "link_button"): + st.link_button("GitHub Repo ⭐", repo_url, type="primary") + else: # fallback + st.markdown(f"[![GitHub](https://img.shields.io/badge/GitHub-Repo-black?logo=github)]({repo_url})", unsafe_allow_html=True) + + +@st.cache_data(show_spinner=False) +def load_jsonl(path: Path) -> pd.DataFrame: + rows = [] + for line in path.read_text(encoding="utf-8").splitlines(): + try: + rows.append(json.loads(line)) + except Exception: + continue + return pd.DataFrame(rows) + + +def tail_jsonl(path: Path, start_pos: int = 0, max_lines: int = 2000) -> tuple[list[dict], int]: + rows: list[dict] = [] + try: + with path.open("rb") as f: + f.seek(start_pos) + for i, line in enumerate(f): + if i > max_lines: + break + try: + rows.append(json.loads(line.decode("utf-8", errors="ignore"))) + except Exception: + continue + pos = f.tell() + except FileNotFoundError: + return [], 0 + return rows, pos + + +def list_processed_files(base: Path) -> list[Path]: + if not base.exists(): + return [] + return sorted([p for p in base.glob("*.jsonl")], key=lambda p: p.stat().st_mtime, reverse=True) + + +def severity_badge(row: dict) -> str: + sev = str(row.get("severity") or row.get("risk") or "unknown").lower() + if sev in ("high", "malicious"): + return "🔴 High" + if sev == "medium": + return "🟠 Medium" + if sev == "low": + return "🟡 Low" + return "⚪ Unknown" + + +def run_cli_stream(args: list[str], workdir: Path | None = None): + proc = subprocess.Popen( + args, + cwd=str(workdir or Path.cwd()), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + bufsize=1, + universal_newlines=True, + ) + assert proc.stdout is not None + for line in proc.stdout: + yield line.rstrip("\n") + proc.wait() + return proc.returncode + + +col1, col2 = st.columns([2, 1]) +with col2: + base_dir = st.text_input("Processed dir", value=str(Path("data/processed").resolve())) + base_path = Path(base_dir) + files = list_processed_files(base_path) + file_names = [f.name for f in files] + selected = st.selectbox("Enriched file", options=file_names) if files else None + uploaded = st.file_uploader("...or upload enriched .jsonl", type=["jsonl"]) # optional + refresh_ms = st.slider("Auto-refresh (ms)", min_value=0, max_value=10000, step=500, value=2000, + help="Set to 0 to disable auto-refresh") + if refresh_ms > 0: + st_autorefresh(interval=refresh_ms, key="auto_refresh") + +with st.sidebar: + st.header("Run Pipeline") + inp = st.text_input("Input file path", value=str(Path("data/raw/access_log.txt").resolve())) + out_dir = st.text_input("Output dir", value=str(Path("data/processed").resolve())) + colv1, colv2 = st.columns(2) + with colv1: + verbose = st.selectbox("Verbose", options=["quiet", "normal", "max"], index=2) + with colv2: + color = st.selectbox("Color", options=["auto", "always", "never"], index=0) + no_llm = st.checkbox("Disable LLM", value=False) + limit = st.number_input("Limit records", min_value=0, value=0, step=100) + summary = st.checkbox("Print summary", value=True) + preview = st.number_input("Preview N records", min_value=0, value=10, step=10) + fmt = st.selectbox("Output format", options=["jsonl", "csv"], index=0) + st.divider() + st.caption("LLM controls") + llm_sample = st.number_input("LLM sample groups (0=all)", min_value=0, value=200) + llm_group_by = st.selectbox("Group by", options=["ip", "signature", "none"], index=0) + group_window = st.number_input("Group window (sec)", min_value=0, value=0) + gate_4xx = st.number_input("LLM gate 4xx >=", min_value=0, value=0) + gate_ua = st.checkbox("LLM gate suspicious UA", value=False) + st.divider() + st.caption("CTI controls") + cti_scope = st.selectbox("CTI scope", options=["suspicious", "all"], index=0) + cti_max = st.number_input("CTI max lookups (0=unlimited)", min_value=0, value=100) + cti_batch_size = st.number_input("CTI batch size (0=off)", min_value=0, value=0) + cti_batch_pause = st.number_input("CTI batch pause (sec)", min_value=0.0, value=0.0, step=0.1) + ai_mal = st.checkbox("AI malicious report", value=False) + run_btn = st.button("Run ▶", type="primary", use_container_width=True) + + st.header("Edit .env") + env_path = Path(".env") + current_env = dotenv_values(env_path) if env_path.exists() else {} + groq_keys = st.text_area("GROQ_API_KEYS (comma-separated)", value=current_env.get("GROQ_API_KEYS", "")) + groq_model = st.text_input("GROQ_MODEL", value=current_env.get("GROQ_MODEL", "llama3-8b-8192")) + risk_4xx = st.text_input("RISK_4XX_THRESHOLD", value=current_env.get("RISK_4XX_THRESHOLD", "5")) + ua_regex = st.text_input("SUSPICIOUS_UA_REGEX", value=current_env.get("SUSPICIOUS_UA_REGEX", "")) + vt_key = st.text_input("VT_API_KEY", value=current_env.get("VT_API_KEY", "")) + otx_key = st.text_input("OTX_API_KEY", value=current_env.get("OTX_API_KEY", "")) + gn_key = st.text_input("GREYNOISE_API_KEY", value=current_env.get("GREYNOISE_API_KEY", "")) + ipinfo = st.text_input("IPINFO_TOKEN", value=current_env.get("IPINFO_TOKEN", "")) + off_block = st.text_input("OFFLINE_IP_BLOCKLIST", value=current_env.get("OFFLINE_IP_BLOCKLIST", "")) + if st.button("Save .env", use_container_width=True): + env_path.touch(exist_ok=True) + set_key(str(env_path), "GROQ_API_KEYS", groq_keys) + set_key(str(env_path), "GROQ_MODEL", groq_model) + set_key(str(env_path), "RISK_4XX_THRESHOLD", risk_4xx) + set_key(str(env_path), "SUSPICIOUS_UA_REGEX", ua_regex) + set_key(str(env_path), "VT_API_KEY", vt_key) + set_key(str(env_path), "OTX_API_KEY", otx_key) + set_key(str(env_path), "GREYNOISE_API_KEY", gn_key) + set_key(str(env_path), "IPINFO_TOKEN", ipinfo) + set_key(str(env_path), "OFFLINE_IP_BLOCKLIST", off_block) + st.success(".env saved ✔") + +if run_btn: + st.session_state["_tail_pos"] = 0 # reset tail to show fresh lines + cmd = [ + "python", "-m", "src.cli", inp, + "--out", out_dir, + "--verbose", verbose, + "--color", color, + "--format", fmt, + "--llm-group-by", llm_group_by, + ] + if no_llm: + cmd.append("--no-llm") + if summary: + cmd.append("--summary") + if preview and int(preview) > 0: + cmd.extend(["--preview", str(int(preview))]) + if limit and int(limit) > 0: + cmd.extend(["--limit", str(int(limit))]) + if int(llm_sample) >= 0: + cmd.extend(["--llm-sample", str(int(llm_sample))]) + if int(group_window) > 0: + cmd.extend(["--group-window", str(int(group_window))]) + if int(gate_4xx) > 0: + cmd.extend(["--llm-gate-4xx", str(int(gate_4xx))]) + if gate_ua: + cmd.append("--llm-gate-ua") + cmd.extend(["--cti-scope", cti_scope]) + if int(cti_max) >= 0: + cmd.extend(["--cti-max", str(int(cti_max))]) + if int(cti_batch_size) > 0: + cmd.extend(["--cti-batch-size", str(int(cti_batch_size))]) + if float(cti_batch_pause) > 0: + cmd.extend(["--cti-batch-pause", str(float(cti_batch_pause))]) + if ai_mal: + cmd.append("--ai-malicious-report") + + st.info("Running pipeline... logs will stream below.") + log_box = st.empty() + log_lines = [] + for ln in run_cli_stream(cmd): + log_lines.append(ln) + # Keep only last few hundred lines for performance + log_box.code("\n".join(log_lines[-400:]), language="bash") + st.success("Pipeline finished. Refresh the table if needed.") + +df = pd.DataFrame() +if uploaded is not None: + df = pd.DataFrame([json.loads(l) for l in uploaded.getvalue().decode("utf-8").splitlines() if l.strip()]) +elif selected: + # Use tailing for scalability and near real-time updates + file_path = base_path / selected + if "_tail_pos" not in st.session_state or st.session_state.get("_tail_file") != str(file_path): + st.session_state["_tail_pos"] = 0 + st.session_state["_tail_file"] = str(file_path) + new_rows, new_pos = tail_jsonl(file_path, st.session_state["_tail_pos"], max_lines=5000) + st.session_state["_tail_pos"] = new_pos + df = pd.DataFrame(new_rows) if new_rows else load_jsonl(file_path) + +if df.empty: + st.info("Select or upload an enriched JSONL file to explore results.") + st.stop() + +# Metrics +total_requests = len(df) +unique_ips = df["ip"].nunique() if "ip" in df.columns else 0 +status_counts = df["status"].astype(str).value_counts() if "status" in df.columns else pd.Series(dtype=int) + +with col1: + m1, m2, m3 = st.columns(3) + m1.metric("Total requests", f"{total_requests}") + m2.metric("Unique IPs", f"{unique_ips}") + if not status_counts.empty: + m3.metric("Top status", f"{status_counts.index[0]}: {int(status_counts.iloc[0])}") + +st.subheader("Status distribution") +if not status_counts.empty: + st.bar_chart(status_counts) +else: + st.write("No status data available.") + +# Latest enriched events with colored severity badges +st.subheader("Latest enriched events (tail)") +df_display = df.copy() +df_display["severity_badge"] = [severity_badge(row) for row in df_display.to_dict(orient="records")] +cols = [c for c in ["ip", "severity_badge", "status", "path", "ua", "rationale"] if c in df_display.columns] +st.dataframe(df_display[cols].tail(200), use_container_width=True) + +# Aggregate suspicious overview from records if they contain CTI annotations +cti_cols = [ + "ip", + "risk", + "abuse_confidence_score", + "total_reports", + "country", + "talos_reputation", + "vt_malicious", + "vt_suspicious", +] +present_cti = [c for c in cti_cols if c in df.columns] +if present_cti: + st.subheader("CTI Signals (per record view)") + st.dataframe(df[present_cti].dropna(how="all").tail(300), use_container_width=True) + +st.caption("Tip: generate enriched JSONL via `python -m src.cli --out data/processed`. The dashboard will auto-refresh.")