diff --git a/.gitignore b/.gitignore index 968643b..5c7a02a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,41 +1,45 @@ -# Environments -.venv/ -env/ -.env -.env.* - -# Python -__pycache__/ -*.pyc - -# Data -data/raw/* -!data/raw/.gitkeep -data/processed/* -!data/processed/.gitkeep - # Cache +.coverage +coverage.xml +# Data +data/cache/ data/cache/* !data/cache/.gitkeep - -# Tool caches / reports -.pytest_cache/ -.mypy_cache/ -.ruff_cache/ +# Data caches and raw logs +data/processed/* +!data/processed/.gitkeep +data/raw/ +data/raw/* +!data/raw/.gitkeep +.debug/* +docs/explanation_of_project.mp4 +.DS_Store +*.env +*.env.* +.env +.env.* +env/ +# Environments +.env.local htmlcov/ -.coverage -coverage.xml - -# Notebooks *.ipynb_checkpoints/ - +# Large binaries +# Local env +*.mov +*.mp4 +.mypy_cache/ +# Notebooks +notebooks/**/*.ipynb_checkpoints/ +# Notebooks outputs # OS -.DS_Store -Thumbs.db - -.coverage +*.pyc +__pycache__/ .pytest_cache/ -.mypy_cache/ +# Python .ruff_cache/ -htmlcov/ -coverage.xml +*.tar +*.tar.gz +Thumbs.db +# Tool caches / reports +.venv/ +*.zip diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..570d8c0 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "workbench.colorCustomizations": { + "terminal.background": "#00000000", + "minimap.background": "#00000000", + "scrollbar.shadow": "#00000000" + } +} \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index 103eb79..8c5de1c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -35,3 +35,51 @@ - Anonymize or truncate sensitive log data before committing. - Large files: store raw datasets outside git or via LFS; keep only small, representative fixtures. +--- + +## Scalable, Budget‑Aware Processing (Project‑Specific) + +Principles + +- Offline‑first: parse, score, and report without network by default; add CTI/LLM only on the smallest, most informative subset. +- Aggregate, then sample: enrich clusters (IP/time/window/signature), not individual lines. +- Cache and dedupe: never ask the network twice for the same thing. +- Budget‑aware: throttle LLM and CTI to a daily budget and degrade gracefully. + +LLM Strategy + +- Grouping: enrich per group, not per line. Default `--llm-group-by ip`; for more precision use `signature` (`ip+path+status+ua`). Optional `--group-window` adds a time bucket. +- Sampling: cap calls with `--llm-sample N` (default 200). Non‑sampled groups are marked `severity=unknown` with a clear rationale. +- Gate before LLM: `--llm-gate-4xx N`, `--llm-gate-ua` so only interesting groups hit the LLM. +- Map–reduce summaries: optionally ask the LLM only for the top‑K groups (via sampling/gates) instead of all events. +- Budget throttle: set `GROQ_TOKENS_BUDGET`; enrichment stops before the cap and continues offline. + +CTI Strategy + +- Suspicious‑first: `--cti-scope suspicious` (default) and `--cti-max 100–200`. +- Strong cache: `data/cache/cti_cache.json` stores results; TTL is optional in future. +- Defer VT/API: query VirusTotal only for final shortlist; continue gracefully if rate‑limited. +- Batch/Resilience: lookups are capped and cached incrementally; re‑runs reuse cache to resume. +- Offline lists: set `OFFLINE_IP_BLOCKLIST` to escalate known‑bad IPs without CTI calls. + +Pipeline Shape + +- Stage 1 (Parse): JSONL output with stable fields; chunk by time window for massive files. +- Stage 2 (Score): per‑IP stats, 4xx ratios, UA flags; produce candidate groups. +- Stage 3 (CTI): shortlist only (top K by 4xx/requests/UA), cached. +- Stage 4 (LLM): grouped + sampled enrichment, budget‑throttled. +- Stage 5 (Reports): deterministic, reproducible, works even with no LLM/CTI. + +Recommended Commands + +- Huge logs, minimal requests: + - `python -m src.cli data/raw/big.log --out data/processed --llm-group-by ip --llm-sample 200 --cti-scope suspicious --cti-max 200 --color never` +- Strictly offline (fastest): + - `python -m src.cli data/raw/big.log --out data/processed --no-llm --no-cti --no-reports` +- Budgeted runs: + - `export GROQ_TOKENS_BUDGET=150000` then run the first command. + +Next Enhancements + +- Time‑window grouping (`--group-window`) implemented; consider adaptive windows per IP for very bursty traffic. +- Add token budget accounting by model/tokenizer if needed; current approach is length‑based and conservative. diff --git a/README.md b/README.md new file mode 100644 index 0000000..3db6468 --- /dev/null +++ b/README.md @@ -0,0 +1,114 @@ +# LogCTIAI — Offline‑First Log Analysis + CTI (LLM‑Optional) + +Bu layihə (AZ): böyük həcmli server/web loglarını emal edir, qruplaşdırılmış LLM şərhləri (istəyə görə) və CTI zənginləşdirməsi ilə təhlükə siqnallarını çıxarır, nəticədə yığcam və təkrarlana bilən hesabatlar yaradır. Şəbəkədən minimal istifadə və büdcə nəzarəti üçün optimallaşdırılıb. + +This project ingests large web/server logs, enriches events with optional LLM analysis, performs CTI lookups against external sources, and generates concise human‑readable reports. It is designed to run reliably on very large datasets with minimal network usage: + +- Auto‑detects `.txt` vs `.log` inputs; parses recognized log lines in `.txt` files. +- Minimizes LLM calls via grouping, sampling, and gates; enforces an optional token budget. +- Minimizes CTI calls via suspicious‑first scoping, caps, batching, and strong caching. +- Works fully offline and degrades gracefully when network or budgets are unavailable. + +See `docs/USAGE.md` for practical commands and tips. See `AGENTS.md` for project conventions and the scalable processing strategy. + +![Mindmap](docs/ProjectMindmapv0.5.png) + +## Quickstart + +- Create env: `python -m venv .venv && source .venv/bin/activate` +- Install deps: `pip install -r requirements.txt` +- Run on a log (auto‑detects `.txt` that look like logs): + - `python -m src.cli data/raw/access_log.txt --out data/processed --summary --preview 3` + - Outputs `data/processed/access_log.jsonl` and `data/processed/reports/` with `.txt` and `.md`. + +If LLM keys are not configured, enrichment runs offline with `severity=unknown` placeholders and continues to produce reports. + +## CLI Overview + +`python -m src.cli --out [options]` + +Common options: + +- `--no-llm`: disable LLM enrichment (default if no keys set). +- `--no-cti`: skip CTI lookups; run fully offline. +- `--no-reports`: skip generating text/markdown reports. +- `--limit N`: process only the first N lines. +- `--format jsonl|csv`: output for enriched events (default: `jsonl`). +- `--color auto|always|never`: terminal color policy. +- `--ai-malicious-report`: after CTI summarization, ask the LLM for a detailed malicious-activity report (saved under `reports/`). + +LLM request control: + +- `--llm-group-by none|ip|signature`: group before LLM calls (default: `ip`); `signature` groups by `ip+path+status+ua`. +- `--group-window SECONDS`: add a time bucket to grouping (e.g., `60`). +- `--llm-sample N`: send only N groups to LLM; the rest are annotated as sampled/gated out (default: `200`). +- `--llm-gate-4xx N`: only send groups with ≥N 4xx responses. +- `--llm-gate-ua`: only send groups with suspicious user‑agents. + +CTI request control: + +- `--cti-scope suspicious|all`: lookup only suspicious IPs (default) or all IPs. +- `--cti-max N`: cap number of IPs to query for CTI (0=unlimited; default: `100`). +- `--cti-batch-size N`, `--cti-batch-pause S`: batch CTI queries and pause between batches; cache flushes periodically. + +Examples (large logs): + +- Minimal network usage: + - `python -m src.cli data/raw/big.log --out data/processed --llm-group-by ip --group-window 60 --llm-gate-4xx 5 --llm-sample 200 --cti-scope suspicious --cti-max 200` +- Strictly offline (fastest): + - `python -m src.cli data/raw/big.log --out data/processed --no-llm --no-cti --no-reports` + +## Environment + +Create a `.env` (see variables below). Keys are optional; the tool runs offline without them. + +- `GROQ_API_KEYS`: comma‑separated LLM keys for rotation. +- `GROQ_MODEL`: Groq model name (default `llama3-8b-8192`). +- `GROQ_TOKENS_BUDGET`: approximate token budget per run/day; enrichment stops before the cap and continues offline. +- `RISK_4XX_THRESHOLD`: per‑IP 4xx threshold to consider suspicious in reports (default `5`). +- `SUSPICIOUS_UA_REGEX`: comma‑separated regex patterns to flag suspicious UAs. +- `VT_API_KEY`: VirusTotal API key (optional; CTI works in a degraded mode without it). +- `OFFLINE_IP_BLOCKLIST`: path to a newline‑separated list of known‑bad IPs to escalate risk without CTI calls. + +## Outputs + +- Enriched events: `data/processed/.jsonl` (or `.csv` with `--format csv`). +- Reports: `data/processed/reports/report.txt` and `report.md` summarizing activity and suspicious IPs; may include a brief AI note if LLM is enabled. +- Malicious AI report (optional): `data/processed/reports/malicious_ai_report.txt|md` if `--ai-malicious-report` is used and malicious CTI signals are present. +- CTI cache: `data/cache/cti_cache.json` (auto‑created and reused to minimize network calls). + +## Testing + +- Run tests: `pytest -q` +- Optional coverage: `pytest --cov=src -q` (if coverage plugin installed). + +Notes: +- If you used the local venv above, run tests via `.venv/bin/pytest -q`. +- A PyPDF2 deprecation warning may appear; it’s harmless and can be ignored. + +## UI Dashboard + +An optional Streamlit dashboard is included for exploration and client-friendly viewing. + +- Install UI deps (already part of `requirements.txt`). +- Run the UI: `scripts/run_ui.sh` (or `streamlit run ui/app.py`). +- Select an enriched `.jsonl` file from `data/processed/` or upload one. +- View status distribution, sample enriched events, and CTI attributes. + +## Troubleshooting + +- `.txt` auto‑detection: the CLI reads a small sample and parses with `parse_line`. If none match, the file is copied as plain text rather than parsed as logs. +- LLM budget exceeded: you’ll see `LLM budget exhausted` in logs; records are still produced with `severity=unknown` and a rationale explaining sampling/gating. +- CTI failures: the pipeline continues with cached/partial data; use `--no-cti` for fully offline runs. Consider `--cti-max` and batching to avoid rate limits. +- No colors or CI: pass `--color never` for consistent, plain output. + +## Docs + +- Usage guide with more examples: `docs/USAGE.md` +- Principles, strategy, and repo conventions: `AGENTS.md` +- Mindmap/diagram: `docs/ProjectMindmapv0.5.png` +- Project write‑ups: `docs/Final Project - Log Analysis + CTI.pdf` + +--- + +Made with a focus on reliability, scalability, and cost‑awareness. diff --git a/data/processed-test/access_log.jsonl b/data/processed-test/access_log.jsonl new file mode 100644 index 0000000..90a2dfd --- /dev/null +++ b/data/processed-test/access_log.jsonl @@ -0,0 +1,185 @@ +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/fan_facemask.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/melon_bike.jpeg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/lemon_juice.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/permafrost.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/green_smoothie.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/fruit_press.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/eggfruit_juice.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/carrot_juice.jpeg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/artwork2.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmEN_&sid=dCq-gfyMWN1lgOZ1AAAC", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/banana_juice.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/apple_pressings.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/products/apple_juice.jpg", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmELD&sid=dCq-gfyMWN1lgOZ1AAAC", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/api/Quantitys/", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/rest/products/search?q=", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/api/Challenges/?name=Score%20Board", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/api/Challenges/?name=Score%20Board", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/rest/languages", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmEGF&sid=dCq-gfyMWN1lgOZ1AAAC", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "POST", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmEG8&sid=dCq-gfyMWN1lgOZ1AAAC", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/assets/public/images/JuiceShop_Logo.png", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:45+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/rest/admin/application-version", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/rest/admin/application-version", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmECK", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/assets/i18n/en.json", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/styles.css", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/main.js", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/vendor.js", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/polyfills.js", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:44+00:00", "method": "GET", "path": "/runtime.js", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/", "proto": "TLSv1.3", "status": 304, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=websocket&sid=JMIIi7MdmkRu-2SAAAAA", "proto": "TLSv1.3", "status": 101, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/fan_facemask.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/melon_bike.jpeg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/lemon_juice.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/permafrost.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/green_smoothie.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/fruit_press.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/eggfruit_juice.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/carrot_juice.jpeg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/banana_juice.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/artwork2.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/apple_pressings.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/assets/public/images/products/apple_juice.jpg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmDs7&sid=JMIIi7MdmkRu-2SAAAAA", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/api/Quantitys/", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/rest/products/search?q=", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/api/Challenges/?name=Score%20Board", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/api/Challenges/?name=Score%20Board", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/MaterialIcons-Regular.woff2", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmDh-&sid=JMIIi7MdmkRu-2SAAAAA", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "POST", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmDhz&sid=JMIIi7MdmkRu-2SAAAAA", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:43+00:00", "method": "GET", "path": "/rest/languages", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/assets/public/images/JuiceShop_Logo.png", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-version", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-configuration", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:42+00:00", "method": "GET", "path": "/rest/admin/application-version", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:41+00:00", "method": "GET", "path": "/assets/i18n/en.json", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:41+00:00", "method": "GET", "path": "/socket.io/?EIO=4&transport=polling&t=PZvmDQz", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:41+00:00", "method": "GET", "path": "/assets/public/favicon_js.ico", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/styles.css", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/vendor.js", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/main.js", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/polyfills.js", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:40+00:00", "method": "GET", "path": "/runtime.js", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-30T06:53:39+00:00", "method": "GET", "path": "/", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:16+00:00", "method": "INDEX", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:16+00:00", "method": "", "path": "", "proto": "", "status": 400, "size": null, "ref": null, "ua": null, "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "TRACK", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "TRACK", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "TRACE", "path": "/", "proto": "", "status": 405, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "TRACE", "path": "/", "proto": "", "status": 405, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "PROPFIND", "path": "/", "proto": "", "status": 400, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "DEBUG", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "NVFYPLCD", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "OPTIONS", "path": "/", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "", "path": "", "proto": "", "status": 400, "size": null, "ref": null, "ua": null, "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "", "path": "", "proto": "", "status": 400, "size": null, "ref": null, "ua": null, "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:38:15+00:00", "method": "PUT", "path": "/nikto-test-dfmcL5fa.html", "proto": "", "status": 301, "size": null, "ref": null, "ua": "nikto", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/group", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/grid", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/greybox", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/green", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/Graphics", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/graphics", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/graph", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/grants", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/granted", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/grant", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/grafik", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gracias", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gr", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gps", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gprs", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gpl", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gpapp", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gp", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/government", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/goto", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/googlebot", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/google_sitemap", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/google", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/goods_script", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/goods", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gone", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/golf", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gold", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/goaway", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/go", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/glossary", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/globes_admin", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/globals", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/globalnav", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/global.asax", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/global.asa", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/Global", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/global", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/glimpse", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/glance_config", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gl", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gitweb", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/git", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/gifts", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/giftregs", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/giftreg_manage", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:27+00:00", "method": "GET", "path": "/giftoptions", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/giftcert", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gift", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gifs", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gif", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gid", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gg", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gfx", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gfen", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gettxt", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getout", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getjobid", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getFile.cfm", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/get-file", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getfile", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getconfig", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/getaccess", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/get_file", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/get", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gestione", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gestion", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gest", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/geronimo", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/german", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/geoip", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/geo", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gentoo", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/generic", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/generator", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/generateditems", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/general", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gen", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/geeklog", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gdform", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gccallback", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gbook", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gb", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gateway", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gate", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/garbage", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/ganglia", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gaming", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/Games", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/games", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gamercard", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/game", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gallery2", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gallery", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/galleries", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/galerie", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/galeria", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gaestebuch", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gadgets", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} +{"ip": "18.237.3.202", "time": "2025-08-29T12:36:26+00:00", "method": "GET", "path": "/gadget", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "dirbuster", "severity": "unknown", "iocs": ["18.237.3.202"], "rationale": "LLM disabled"} diff --git a/data/processed-test/new_log.jsonl b/data/processed-test/new_log.jsonl new file mode 100644 index 0000000..90debed --- /dev/null +++ b/data/processed-test/new_log.jsonl @@ -0,0 +1,15 @@ +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/91.0", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/91.0", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A366 Safari/600.1.4", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A366 Safari/600.1.4", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "msnbot/1.1 (+http://search.msn.com/msnbot.htm)", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/robots.txt", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/robots.txt", "proto": "", "status": 301, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/sitemap.xml", "proto": "", "status": 301, "size": null, "ref": null, "ua": "msnbot/1.1 (+http://search.msn.com/msnbot.htm)", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} +{"ip": "5.135.75.243", "time": "2025-09-04T06:22:10+00:00", "method": "GET", "path": "/robots.txt", "proto": "TLSv1.3", "status": 200, "size": null, "ref": null, "ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/91.0", "severity": "unknown", "iocs": ["5.135.75.243"], "rationale": "LLM disabled"} diff --git a/docs/ProjectMindmapv0.5.png b/docs/ProjectMindmapv0.5.png new file mode 100644 index 0000000..f7e89e1 Binary files /dev/null and b/docs/ProjectMindmapv0.5.png differ diff --git a/docs/USAGE.md b/docs/USAGE.md index 0eee43f..2b683ba 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -5,6 +5,7 @@ Usage - Basic run (log -> enriched events + reports): - `python -m src.cli data/raw/access_log.txt --out data/processed --summary --preview 3` - Adds `data/processed/access_log.jsonl` and `data/processed/reports/report.txt|md`. + - Any `.log` file is treated as a log. `.txt` files are auto-detected: if they contain recognizable log lines, they are parsed as logs; otherwise they are copied as plain text. Example: `python -m src.cli data/raw/new_log.txt --out data/processed`. - Options: - `--no-llm`: disable LLM enrichment (default if no GROQ keys). - `--no-cti`: disable CTI lookups (scraping/API); runs offline. @@ -12,12 +13,25 @@ Usage - `--limit N`: process only the first N lines for quick tests. - `--format jsonl|csv`: choose output for enriched events. - `--color auto|always|never`: terminal color policy. + - `--llm-group-by none|ip|signature`: group records before LLM calls to reduce requests. `ip` groups by source IP (minimal requests). `signature` groups by `ip+path+status+ua`. + - `--llm-sample N`: only send N groups to the LLM; non-selected groups are annotated as `severity=unknown` with rationale `LLM sampled out`. + - `--llm-gate-4xx N`: only send groups with at least N 4xx responses. + - `--llm-gate-ua`: only send groups with suspicious user-agents. + - `--group-window SECONDS`: add a time window bucket to grouping to compress bursts (e.g., `60`). + - `--cti-scope suspicious|all`: look up CTI for only suspicious IPs (based on 4xx and UA) or all IPs. + - `--cti-max N`: maximum number of IPs to query for CTI (0=unlimited). + - `--cti-batch-size N` and `--cti-batch-pause S`: periodically flush cache and pause S seconds between CTI batches. Environment - Copy `.env.example` to `.env` and set: - `GROQ_API_KEYS` for LLM enrichment (comma-separated supported). - `GROQ_MODEL` if you want to change the default. + - Optional CTI provider keys: + - `VT_API_KEY` (VirusTotal IP lookups) + - `OTX_API_KEY` (AlienVault OTX pulses) + - `GREYNOISE_API_KEY` (GreyNoise community/enterprise) + - `IPINFO_TOKEN` (org/geo enrichment) Testing @@ -26,6 +40,21 @@ Testing Notes -- CTI lookups use AbuseIPDB public site scraping as a baseline. In offline or restricted environments, the tool continues without CTI data. +- CTI lookups include AbuseIPDB/Talos/VirusTotal by default, and will also use OTX, GreyNoise, ThreatFox, and IPInfo when keys/network are available. In offline or restricted environments, the tool continues without CTI data. - Reports summarize overall activity, surface suspicious IPs (CTI risk, 4xx rate, UA flags), and include an optional brief AI anomaly insight when LLM is enabled. +Performance tips + +- To avoid rate limits on large logs, prefer `--llm-group-by ip --group-window 60 --llm-gate-4xx 5 --llm-sample 200 --cti-scope suspicious --cti-max 200`. +- For fully offline, fastest runs use `--no-llm --no-cti --no-reports`. + +Environment variables + +- `GROQ_TOKENS_BUDGET`: approximate daily token budget for LLM calls. When reached, enrichment gracefully degrades and continues offline. +- `OFFLINE_IP_BLOCKLIST`: path to a newline-separated list of IPs to treat as high risk without CTI calls. + +Dashboard + +- Install UI deps: `pip install -r requirements.txt` +- Run: `streamlit run ui/app.py` +- Select the latest file in `data/processed/` and keep the auto-refresh enabled for near real-time updates while the CLI processes logs. diff --git a/docs/wiki/CLI.md b/docs/wiki/CLI.md new file mode 100644 index 0000000..eecd966 --- /dev/null +++ b/docs/wiki/CLI.md @@ -0,0 +1,26 @@ +# CLI Reference + +`python -m src.cli --out [options]` + +Core +- `--format {jsonl,csv}` +- `--summary`, `--preview N` +- `--no-llm`, `--no-cti`, `--no-reports` +- `--limit N`, `--color {auto,always,never}` + +LLM +- `--llm-group-by {none,ip,signature}` +- `--group-window SECONDS` +- `--llm-sample N`, `--llm-gate-4xx N`, `--llm-gate-ua` + +CTI +- `--cti-scope {suspicious,all}` +- `--cti-max N`, `--cti-batch-size N`, `--cti-batch-pause S` + +Examples +```bash +python -m src.cli data/raw/access.log --out data/processed --summary --preview 5 +python -m src.cli data/raw/big.log --out data/processed \ + --llm-group-by signature --llm-sample 100 --cti-max 100 --summary +``` + diff --git a/docs/wiki/CTI_and_LLM_Strategy.md b/docs/wiki/CTI_and_LLM_Strategy.md new file mode 100644 index 0000000..04add31 --- /dev/null +++ b/docs/wiki/CTI_and_LLM_Strategy.md @@ -0,0 +1,20 @@ +# CTI + LLM Strategy + +Principles +- Offline‑first: deterministic outputs without network +- Group then sample to minimize LLM calls +- Gates on 4xx and suspicious UA +- Strong CTI cache; batch + pause for resilience +- Budget throttle via `GROQ_TOKENS_BUDGET` + +LLM +- Grouping: `ip` or `signature` (ip+path+status+ua) +- Sampling: `--llm-sample N` (default 200) +- Gates: `--llm-gate-4xx N`, `--llm-gate-ua` + +CTI +- Scope: `--cti-scope suspicious` (default) or `all` +- Caps: `--cti-max`, batching and pause +- Cache: `data/cache/cti_cache.json` reused across runs +- VT/API: defer to shortlist; fail soft when rate‑limited + diff --git a/docs/wiki/Development.md b/docs/wiki/Development.md new file mode 100644 index 0000000..abfd410 --- /dev/null +++ b/docs/wiki/Development.md @@ -0,0 +1,19 @@ +# Development + +Structure +- `src/`: parsers, enrichers (LLM/CTI), CLI +- `tests/`: pytest suite (80%+ target), fixtures under `tests/fixtures/` +- `docs/`: usage and diagrams +- `notebooks/`: exploratory analysis (clear outputs) +- `data/raw/`: raw logs (keep large datasets out of git) + +Commands +- Tests: `pytest -q` or `pytest --cov=src -q` +- Lint/format: `ruff check . && ruff format .` +- Types: `mypy src` + +Contributing +- Conventional Commits; small PRs preferred (~≤300 LOC) +- No secrets: use `.env`, provide `.env.example` +- Add fixtures for new parsers; mock network in tests + diff --git a/docs/wiki/FAQ.md b/docs/wiki/FAQ.md new file mode 100644 index 0000000..05f33d7 --- /dev/null +++ b/docs/wiki/FAQ.md @@ -0,0 +1,14 @@ +# FAQ + +Q: Can I run fully offline? +A: Yes — use `--no-llm --no-cti`. Reports remain reproducible; severity is marked `unknown` with rationale. + +Q: How to avoid rate limits? +A: Use `--cti-max`, batching flags, and rely on the cache. Prefer grouping + sampling for LLM. + +Q: Why are some groups missing LLM notes? +A: They were gated/sampled out or the budget was reached. + +Q: Where are results stored? +A: `data/processed/` and `data/processed/reports/`. + diff --git a/docs/wiki/Home.md b/docs/wiki/Home.md new file mode 100644 index 0000000..d2bde44 --- /dev/null +++ b/docs/wiki/Home.md @@ -0,0 +1,17 @@ +# LogCTIAI Wiki + +Welcome to the LogCTIAI wiki. This project analyzes large web/server logs, enriches them with optional LLM reasoning, and performs efficient CTI lookups — all designed to run offline‑first and on a budget. + +- Quickstart: installation and first run +- CLI: all flags and examples +- CTI + LLM Strategy: grouping, sampling, gates, cache, budgets +- Development: repo structure, testing, style +- FAQ: common questions and troubleshooting + +![Mindmap](../ProjectMindmapv0.5.png) + +Useful links: +- README (repo root) +- Usage guide: docs/USAGE.md +- Streamlit UI: ui/ + diff --git a/docs/wiki/Quickstart.md b/docs/wiki/Quickstart.md new file mode 100644 index 0000000..6dca8b9 --- /dev/null +++ b/docs/wiki/Quickstart.md @@ -0,0 +1,18 @@ +# Quickstart + +Create env +- `python -m venv .venv && source .venv/bin/activate` +- `pip install -r requirements.txt` + +Offline run +- `python -m src.cli data/raw/big.log --out data/processed --no-llm --no-cti --no-reports` + +Budgeted run +- `export GROQ_TOKENS_BUDGET=150000` +- `python -m src.cli data/raw/big.log --out data/processed --llm-group-by ip --llm-sample 200 --cti-scope suspicious --cti-max 200 --color never` + +Outputs +- `data/processed/*.jsonl|csv` +- `data/processed/reports/*.{txt,md}` +- `data/cache/cti_cache.json` + diff --git a/requirements.txt b/requirements.txt index 5ef8a99..4b326e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,7 @@ pytest-cov>=5.0.0 rich>=13.7.1 uvloop; platform_system != 'Windows' markdown>=3.6 +streamlit>=1.34.0 +pandas>=2.2.2 +altair>=5.3.0 +streamlit-autorefresh>=0.1.1 diff --git a/scripts/publish_wiki.sh b/scripts/publish_wiki.sh new file mode 100755 index 0000000..2072b31 --- /dev/null +++ b/scripts/publish_wiki.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_URL=$(git remote get-url origin) +OWNER_REPO=${REPO_URL#https://github.com/} +OWNER_REPO=${OWNER_REPO%.git} + +WIKI_URL="https://github.com/${OWNER_REPO}.wiki.git" +WORKDIR=$(mktemp -d) +trap 'rm -rf "$WORKDIR"' EXIT + +echo "Cloning wiki: $WIKI_URL" +if ! git clone "$WIKI_URL" "$WORKDIR"; then + echo "Error: Wiki repository not found. Ensure the repo's Wiki is enabled and that you have push access." >&2 + echo "You can enable Wiki with: gh repo edit ${OWNER_REPO} --enable-wiki" >&2 + exit 1 +fi + +rsync -a --delete docs/wiki/ "$WORKDIR"/ +cd "$WORKDIR" +git add . +if git diff --cached --quiet; then + echo "No wiki changes to publish." + exit 0 +fi +git commit -m "wiki: sync from docs/wiki" +git push origin HEAD +echo "Wiki published: https://github.com/${OWNER_REPO}/wiki" + diff --git a/scripts/run_ui.sh b/scripts/run_ui.sh new file mode 100755 index 0000000..54af48a --- /dev/null +++ b/scripts/run_ui.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [ -d .venv ]; then + . .venv/bin/activate +fi + +exec streamlit run ui/app.py + diff --git a/src/cli.py b/src/cli.py index f6b94e1..80db62d 100644 --- a/src/cli.py +++ b/src/cli.py @@ -16,12 +16,17 @@ from rich.traceback import install as rich_traceback_install from .parsers.text_extractor import extract_text_from_pdf, read_text_file -from .parsers.log_parser import parse_lines +from .parsers.log_parser import parse_lines, parse_line from .enrichers.llm_enricher import enrich_log_records from .enrichers.cti_service import cti_for_ips from .parsers.ua_analysis import detect_suspicious_user_agent -from .reports.report_builder import build_text_report, build_markdown_report +from .reports.report_builder import ( + build_text_report, + build_markdown_report, + build_malicious_ai_report, +) from .config import get_settings +from .groq_client import GroqRotatingClient rich_traceback_install(show_locals=False) @@ -36,6 +41,17 @@ def process_log( out_format: str = "jsonl", with_cti: bool = True, build_reports: bool = True, + *, + llm_sample: int | None = None, + llm_group_by: list[str] | None = None, + group_window_sec: int | None = None, + llm_gate_min_4xx: int | None = None, + llm_gate_ua: bool = False, + cti_scope: str = "suspicious", + cti_max: int | None = None, + cti_batch_size: int | None = None, + cti_batch_pause: float = 0.0, + ai_malicious_report: bool = False, ) -> Path: console.rule("[bold cyan]🔎 Parsing Log") console.log(f"Parsing log: [bold]{path}") @@ -44,7 +60,15 @@ def process_log( lines = lines[:limit] records = [r.to_dict() for r in parse_lines(lines)] console.log(f"Parsed [bold green]{len(records)}[/] records") - enriched = enrich_log_records(records, use_llm=use_llm) + enriched = enrich_log_records( + records, + use_llm=use_llm, + llm_sample=llm_sample, + group_by=llm_group_by, + group_window_sec=group_window_sec, + llm_gate_min_4xx=llm_gate_min_4xx, + llm_gate_ua=llm_gate_ua, + ) out_dir.mkdir(parents=True, exist_ok=True) if out_format == "csv": out_path = out_dir / f"{path.stem}.csv" @@ -66,6 +90,10 @@ def process_log( enriched_records=enriched, use_llm=use_llm, with_cti=with_cti, + cti_scope=cti_scope, + cti_max=cti_max, + cti_batch_size=cti_batch_size, + cti_batch_pause=cti_batch_pause, ) reports_dir = out_dir / "reports" txt_path = build_text_report( @@ -82,6 +110,63 @@ def process_log( ) console.log(f"Reports saved: [bold]{txt_path}[/], [bold]{md_path}[/]") + # Optional: generate a detailed malicious activity report using LLM + if ai_malicious_report and use_llm and suspicious_rows: + try: + + # Select IPs with strongest malicious indicators + def is_malicious(row: dict[str, object]) -> bool: + risk = str(row.get("risk", "unknown")).lower() + talos = str(row.get("talos_reputation", "")).lower() + vt_mal = int(row.get("vt_malicious") or 0) + vt_susp = int(row.get("vt_suspicious") or 0) + return ( + risk in {"high"} + or talos in {"untrusted", "malicious"} + or vt_mal >= 1 + or vt_susp >= 3 + ) + + malicious = [r for r in suspicious_rows if is_malicious(r)] + if malicious: + # Derive minimal per-IP context from enriched events (top paths/UA) + from collections import Counter as _C + per_ip_paths: dict[str, list[tuple[str, int]]] = {} + per_ip_ua: dict[str, str] = {} + for ip in {str(r.get("ip")) for r in malicious}: + paths = _C([str(e.get("path")) for e in enriched if str(e.get("ip")) == ip and e.get("path")]) + per_ip_paths[ip] = paths.most_common(5) + # pick any UA string observed + for e in enriched: + if str(e.get("ip")) == ip and (e.get("ua") or e.get("user_agent")): + per_ip_ua[ip] = str(e.get("ua") or e.get("user_agent")) + break + # Build prompt + insight_req = { + "malicious": malicious[:20], # cap to keep prompt small + "per_ip_top_paths": per_ip_paths, + "per_ip_ua": per_ip_ua, + } + client = GroqRotatingClient() + content = client.chat([ + { + "role": "system", + "content": ( + "You are a senior SOC analyst. Draft a concise but detailed incident note summarizing malicious " + "activity detected in logs corroborated by CTI (AbuseIPDB, Talos, VirusTotal). " + "Include: IP(s), CTI signals, notable paths, suspected TTPs, and recommended actions (blocking, WAF rules, triage). " + "Use clear sections and bullets." + ), + }, + {"role": "user", "content": json.dumps(insight_req)}, + ]) + rpt_txt, rpt_md = build_malicious_ai_report(reports_dir, content) + console.log(f"Malicious AI report saved: [bold]{rpt_txt}[/], [bold]{rpt_md}[/]") + else: + console.log("[dim]No strong malicious CTI signals; skipping detailed AI report.") + except Exception as e: # pragma: no cover - network/env specific + console.log(f"[dim]Malicious AI report unavailable: {e}") + return out_path @@ -89,6 +174,11 @@ def summarize_and_cti( enriched_records: list[dict[str, object]], use_llm: bool, with_cti: bool = True, + *, + cti_scope: str = "suspicious", # 'suspicious' | 'all' + cti_max: int | None = None, + cti_batch_size: int | None = None, + cti_batch_pause: float = 0.0, ) -> tuple[dict[str, object], list[dict[str, object]], str | None]: """Compute overall stats, annotate suspicious IPs with CTI + UA, and optional AI note. @@ -108,6 +198,7 @@ def summarize_and_cti( } # Per-IP stats + settings = get_settings() per_ip = defaultdict(lambda: {"requests": 0, "errors_4xx": 0, "ua_suspicious": False}) for r in enriched_records: ip = str(r.get("ip") or "") @@ -120,14 +211,38 @@ def summarize_and_cti( status = 0 if 400 <= status < 500: per_ip[ip]["errors_4xx"] += 1 - ua_susp, _ = detect_suspicious_user_agent(str(r.get("ua") or r.get("user_agent") or "")) + ua_susp, _ = detect_suspicious_user_agent( + str(r.get("ua") or r.get("user_agent") or ""), + patterns=settings.suspicious_ua_patterns or None, + ) per_ip[ip]["ua_suspicious"] = per_ip[ip]["ua_suspicious"] or ua_susp # CTI lookup cti_map: dict[str, dict[str, object]] = {} if with_cti: try: - cti_results = cti_for_ips(per_ip.keys()) + # Decide candidate IPs to look up: prefer suspicious or top 4xx + if cti_scope == "all": + candidates = list(per_ip.keys()) + else: + candidates = [ + ip + for ip, stats in per_ip.items() + if (stats["errors_4xx"] >= settings.risk_4xx_threshold) or stats["ua_suspicious"] + ] + # Sort by 4xx desc then requests desc + candidates.sort(key=lambda i: (per_ip[i]["errors_4xx"], per_ip[i]["requests"]), reverse=True) + if cti_max is not None and cti_max >= 0: + candidates = candidates[:cti_max] + cti_results = cti_for_ips( + candidates, + virustotal_api_key=settings.virustotal_api_key, + otx_api_key=settings.otx_api_key, + greynoise_api_key=settings.greynoise_api_key, + ipinfo_token=settings.ipinfo_token, + batch_size=cti_batch_size, + pause_seconds=cti_batch_pause, + ) cti_map = {ip: v.to_dict() for ip, v in cti_results.items()} except Exception as e: # pragma: no cover - network / env specific console.log(f"[dim]CTI lookup failed: {e}. Continuing without CTI.") @@ -135,10 +250,27 @@ def summarize_and_cti( # Build suspicious rows suspicious_rows: list[dict[str, object]] = [] + # Load offline blocklist if provided + offline_blocked: set[str] = set() + if settings.offline_ip_blocklist: + try: + from pathlib import Path as _P + p = _P(settings.offline_ip_blocklist) + if p.exists(): + offline_blocked = {line.strip() for line in p.read_text(encoding="utf-8", errors="ignore").splitlines() if line.strip() and not line.strip().startswith('#')} + except Exception: + offline_blocked = set() for ip, stats in per_ip.items(): cti = cti_map.get(ip, {}) risk = str(cti.get("risk", "unknown")) - is_susp = risk in {"high", "medium"} or stats["errors_4xx"] >= 5 or stats["ua_suspicious"] + # Offline blocklist escalation + if ip in offline_blocked and risk != "high": + risk = "high" + is_susp = ( + risk in {"high", "medium"} + or stats["errors_4xx"] >= settings.risk_4xx_threshold + or stats["ua_suspicious"] + ) if not is_susp: continue row = { @@ -148,6 +280,14 @@ def summarize_and_cti( "total_reports": cti.get("total_reports"), "country": cti.get("country"), "url": cti.get("url"), + "talos_reputation": cti.get("talos_reputation"), + "talos_owner": cti.get("talos_owner"), + "vt_malicious": cti.get("vt_malicious"), + "vt_suspicious": cti.get("vt_suspicious"), + "otx_pulse_count": cti.get("otx_pulse_count"), + "greynoise_classification": cti.get("greynoise_classification"), + "threatfox_matches": cti.get("threatfox_matches"), + "ipinfo_org": cti.get("ipinfo_org"), **stats, } # One-line AI note from existing enrichment (if any) @@ -164,8 +304,6 @@ def summarize_and_cti( ai_insight: str | None = None if use_llm: try: - from .groq_client import GroqRotatingClient - client = GroqRotatingClient() insight_req = { "total_requests": total_requests, @@ -194,8 +332,6 @@ def process_pdf(path: Path, out_dir: Path, use_llm: bool) -> Path: out_path.write_text(text, encoding="utf-8") # Optional: one-shot summary with LLM if use_llm and text.strip(): - from .groq_client import GroqRotatingClient - client = GroqRotatingClient() summary = client.chat([ {"role": "system", "content": "Summarize the key findings in 5 bullets."}, @@ -277,7 +413,24 @@ def main(argv: List[str] | None = None) -> int: parser.add_argument("--format", choices=["jsonl", "csv"], default="jsonl", help="Output format for logs") parser.add_argument("--no-cti", action="store_true", help="Disable CTI lookups") parser.add_argument("--no-reports", action="store_true", help="Do not build text/markdown reports") + parser.add_argument("--ai-malicious-report", action="store_true", help="Generate detailed AI report for malicious CTI signals") parser.add_argument("--color", choices=["auto", "always", "never"], default="auto", help="Terminal color policy") + # LLM request controls + parser.add_argument("--llm-sample", type=int, default=200, help="Limit LLM calls by sampling this many groups (0=all)") + parser.add_argument( + "--llm-group-by", + choices=["none", "ip", "signature"], + default="ip", + help="Group records before enrichment to reduce LLM calls: 'ip' (minimal), 'signature' (ip+path+status+ua), or 'none'", + ) + parser.add_argument("--group-window", type=int, default=0, help="Optional time window (seconds) to include in grouping key") + parser.add_argument("--llm-gate-4xx", type=int, default=0, help="Only send groups with at least this many 4xx to the LLM (0=disabled)") + parser.add_argument("--llm-gate-ua", action="store_true", help="Only send groups with suspicious UA patterns to the LLM") + # CTI request controls + parser.add_argument("--cti-scope", choices=["suspicious", "all"], default="suspicious", help="Which IPs to look up for CTI") + parser.add_argument("--cti-max", type=int, default=100, help="Max CTI lookups (0=unlimited)") + parser.add_argument("--cti-batch-size", type=int, default=0, help="Batch size for CTI lookups (0=disabled)") + parser.add_argument("--cti-batch-pause", type=float, default=0.0, help="Pause seconds between CTI batches") args = parser.parse_args(argv) # Configure console color policy @@ -300,7 +453,32 @@ def main(argv: List[str] | None = None) -> int: out_path: Path enriched_records: List[Dict[str, object]] | None = None - if suffix in {".log", ".txt"} and path.name.startswith("access_log"): + # Heuristic: treat .log as logs; for .txt, auto-detect by trying to parse a few lines + def _looks_like_log_file(p: Path, sample_lines: int = 200) -> bool: + try: + text = p.read_text(encoding="utf-8", errors="ignore") + except Exception: + return False + lines = text.splitlines()[:sample_lines] + parsed = 0 + for ln in lines: + if parse_line(ln): + parsed += 1 + # One hit is enough to call it a log + break + return parsed > 0 + + if suffix == ".log" or (suffix == ".txt" and _looks_like_log_file(path)): + # Compute grouping config for LLM + gb = None + if args.llm_group_by == "ip": + gb = ["ip"] + elif args.llm_group_by == "signature": + gb = ["ip", "path", "status", "ua"] + # Normalize sample value + sample = None if args.llm_sample in (None, 0) else max(0, int(args.llm_sample)) + group_window = None if args.group_window in (None, 0) else max(1, int(args.group_window)) + gate4xx = None if args.llm_gate_4xx in (None, 0) else max(1, int(args.llm_gate_4xx)) out_path = process_log( path, out_dir, @@ -309,19 +487,19 @@ def main(argv: List[str] | None = None) -> int: out_format=args.format, with_cti=not args.no_cti, build_reports=not args.no_reports, + llm_sample=sample, + llm_group_by=gb, + group_window_sec=group_window, + llm_gate_min_4xx=gate4xx, + llm_gate_ua=bool(args.llm_gate_ua), + cti_scope=args.cti_scope, + cti_max=(None if args.cti_max in (None, 0) else max(0, int(args.cti_max))), + cti_batch_size=(None if getattr(args, 'cti_batch_size', 0) in (None, 0) else max(1, int(args.cti_batch_size))), + cti_batch_pause=float(getattr(args, 'cti_batch_pause', 0.0) or 0.0), + ai_malicious_report=bool(args.ai_malicious_report), ) # Load enriched to drive summary/preview enriched_records = [json.loads(l) for l in (out_dir / f"{path.stem}.jsonl").read_text(encoding="utf-8").splitlines()] if args.format == "jsonl" else None - elif suffix == ".log": - out_path = process_log( - path, - out_dir, - use_llm=use_llm, - limit=args.limit, - out_format=args.format, - with_cti=not args.no_cti, - build_reports=not args.no_reports, - ) elif suffix == ".pdf": out_path = process_pdf(path, out_dir, use_llm=use_llm) elif suffix == ".txt": diff --git a/src/config.py b/src/config.py index 87280c8..751113a 100644 --- a/src/config.py +++ b/src/config.py @@ -3,7 +3,7 @@ import os import random from dataclasses import dataclass -from typing import List +from typing import List, Optional from dotenv import load_dotenv @@ -12,6 +12,13 @@ class Settings: groq_api_keys: List[str] groq_model: str + risk_4xx_threshold: int + suspicious_ua_patterns: List[str] + virustotal_api_key: Optional[str] + otx_api_key: Optional[str] + greynoise_api_key: Optional[str] + ipinfo_token: Optional[str] + offline_ip_blocklist: Optional[str] def get_settings() -> Settings: @@ -21,5 +28,26 @@ def get_settings() -> Settings: # Shuffle order to distribute load if multiple keys are provided random.shuffle(keys) model = os.getenv("GROQ_MODEL", "llama3-8b-8192") - return Settings(groq_api_keys=keys, groq_model=model) - + # Risk/UA configuration + try: + risk_4xx_threshold = int(os.getenv("RISK_4XX_THRESHOLD", "5")) + except ValueError: + risk_4xx_threshold = 5 + ua_raw = os.getenv("SUSPICIOUS_UA_REGEX", "").strip() + ua_patterns: List[str] = [p.strip() for p in ua_raw.split(",") if p.strip()] + vt_key = os.getenv("VT_API_KEY") or None + otx_key = os.getenv("OTX_API_KEY") or None + gn_key = os.getenv("GREYNOISE_API_KEY") or None + ipinfo = os.getenv("IPINFO_TOKEN") or None + offline_blocklist = os.getenv("OFFLINE_IP_BLOCKLIST") or None + return Settings( + groq_api_keys=keys, + groq_model=model, + risk_4xx_threshold=risk_4xx_threshold, + suspicious_ua_patterns=ua_patterns, + virustotal_api_key=vt_key, + otx_api_key=otx_key, + greynoise_api_key=gn_key, + ipinfo_token=ipinfo, + offline_ip_blocklist=offline_blocklist, + ) diff --git a/src/enrichers/cti_providers.py b/src/enrichers/cti_providers.py index 6f76f7e..87b0133 100644 --- a/src/enrichers/cti_providers.py +++ b/src/enrichers/cti_providers.py @@ -14,6 +14,22 @@ class AbuseIPDBResult: url: str +@dataclass +class TalosResult: + ip: str + reputation: Optional[str] + owner: Optional[str] + url: str + + +@dataclass +class VirusTotalResult: + ip: str + malicious: Optional[int] + suspicious: Optional[int] + url: str + + def fetch_abuseipdb(ip: str, timeout: float = 15.0) -> AbuseIPDBResult: # Lazy imports to keep tests independent of optional deps try: @@ -78,3 +94,59 @@ def _extract_text(patterns): country=country, url=url, ) + + +def fetch_talos(ip: str, timeout: float = 15.0) -> TalosResult: + try: + import httpx # type: ignore + except Exception: # pragma: no cover + httpx = None # type: ignore + try: + from bs4 import BeautifulSoup # type: ignore + except Exception: # pragma: no cover + BeautifulSoup = None # type: ignore + + url = f"https://talosintelligence.com/reputation_center/lookup?search={ip}" + if httpx is None or BeautifulSoup is None: # pragma: no cover + return TalosResult(ip=ip, reputation=None, owner=None, url=url) + try: + with httpx.Client(follow_redirects=True, timeout=timeout) as client: + resp = client.get(url) + resp.raise_for_status() + html = resp.text + except Exception: # pragma: no cover + return TalosResult(ip=ip, reputation=None, owner=None, url=url) + soup = BeautifulSoup(html, "html.parser") + text = soup.get_text(" ", strip=True) + rep = None + owner = None + # Heuristic patterns + m = re.search(r"Web Reputation\s*:?\s*([A-Za-z]+)", text, re.IGNORECASE) + if m: + rep = m.group(1).strip() + m = re.search(r"Owner\s*:?\s*([\w\s\-\.,]+)", text, re.IGNORECASE) + if m: + owner = m.group(1).strip() + return TalosResult(ip=ip, reputation=rep, owner=owner, url=url) + + +def fetch_virustotal(ip: str, api_key: Optional[str], timeout: float = 15.0) -> VirusTotalResult: + url = f"https://www.virustotal.com/api/v3/ip_addresses/{ip}" + if not api_key: # pragma: no cover + return VirusTotalResult(ip=ip, malicious=None, suspicious=None, url=url) + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return VirusTotalResult(ip=ip, malicious=None, suspicious=None, url=url) + try: + with httpx.Client(timeout=timeout, headers={"x-apikey": api_key}) as client: + r = client.get(url) + if r.status_code >= 400: + return VirusTotalResult(ip=ip, malicious=None, suspicious=None, url=url) + data = r.json() + stats = data.get("data", {}).get("attributes", {}).get("last_analysis_stats", {}) + mal = stats.get("malicious") + susp = stats.get("suspicious") + return VirusTotalResult(ip=ip, malicious=mal, suspicious=susp, url=url) + except Exception: # pragma: no cover + return VirusTotalResult(ip=ip, malicious=None, suspicious=None, url=url) diff --git a/src/enrichers/cti_providers_ext.py b/src/enrichers/cti_providers_ext.py new file mode 100644 index 0000000..e345b20 --- /dev/null +++ b/src/enrichers/cti_providers_ext.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional, List, Dict + + +@dataclass +class OTXResult: + ip: str + pulse_count: Optional[int] + reputation: Optional[int] + url: str + + +@dataclass +class GreyNoiseResult: + ip: str + classification: Optional[str] # benign|malicious|unknown + name: Optional[str] + url: str + + +@dataclass +class ThreatFoxResult: + ip: str + matches: Optional[int] + url: str + + +@dataclass +class IPInfoResult: + ip: str + org: Optional[str] + country: Optional[str] + city: Optional[str] + url: str + + +def fetch_otx(ip: str, api_key: Optional[str], timeout: float = 15.0) -> OTXResult: + url = f"https://otx.alienvault.com/api/v1/indicators/IPv4/{ip}/general" + if not api_key: + return OTXResult(ip=ip, pulse_count=None, reputation=None, url=url) + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return OTXResult(ip=ip, pulse_count=None, reputation=None, url=url) + try: + with httpx.Client(timeout=timeout, headers={"X-OTX-API-KEY": api_key}) as client: + r = client.get(url) + if r.status_code >= 400: + return OTXResult(ip=ip, pulse_count=None, reputation=None, url=url) + data = r.json() + pulse_info = data.get("pulse_info", {}) + count = int(pulse_info.get("count") or 0) + rep = data.get("reputation") + try: + rep = int(rep) if rep is not None else None + except Exception: + rep = None + return OTXResult(ip=ip, pulse_count=count, reputation=rep, url=url) + except Exception: # pragma: no cover + return OTXResult(ip=ip, pulse_count=None, reputation=None, url=url) + + +def fetch_greynoise(ip: str, api_key: Optional[str], timeout: float = 15.0) -> GreyNoiseResult: + url = f"https://api.greynoise.io/v3/community/{ip}" + if not api_key: + return GreyNoiseResult(ip=ip, classification=None, name=None, url=url) + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return GreyNoiseResult(ip=ip, classification=None, name=None, url=url) + try: + with httpx.Client(timeout=timeout, headers={"key": api_key}) as client: + r = client.get(url) + if r.status_code >= 400: + return GreyNoiseResult(ip=ip, classification=None, name=None, url=url) + data = r.json() + return GreyNoiseResult( + ip=ip, + classification=data.get("classification"), + name=data.get("name"), + url=url, + ) + except Exception: # pragma: no cover + return GreyNoiseResult(ip=ip, classification=None, name=None, url=url) + + +def fetch_threatfox(ip: str, timeout: float = 15.0) -> ThreatFoxResult: + url = "https://threatfox-api.abuse.ch/api/v1/" + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return ThreatFoxResult(ip=ip, matches=None, url=url) + try: + with httpx.Client(timeout=timeout) as client: + r = client.post(url, json={"query": "search_ioc", "search_term": ip}) + if r.status_code >= 400: + return ThreatFoxResult(ip=ip, matches=None, url=url) + data = r.json() + # Response has { query_status, data: [ ... ] } + arr: List[Dict[str, object]] = data.get("data") or [] + return ThreatFoxResult(ip=ip, matches=len(arr) if isinstance(arr, list) else 0, url=url) + except Exception: # pragma: no cover + return ThreatFoxResult(ip=ip, matches=None, url=url) + + +def fetch_ipinfo(ip: str, token: Optional[str], timeout: float = 10.0) -> IPInfoResult: + url = f"https://ipinfo.io/{ip}/json" + # IPInfo allows limited anonymous queries; token improves reliability + try: + import httpx # type: ignore + except Exception: # pragma: no cover + return IPInfoResult(ip=ip, org=None, country=None, city=None, url=url) + try: + headers = {"Authorization": f"Bearer {token}"} if token else {} + with httpx.Client(timeout=timeout, headers=headers) as client: + r = client.get(url) + if r.status_code >= 400: + return IPInfoResult(ip=ip, org=None, country=None, city=None, url=url) + data = r.json() + return IPInfoResult( + ip=ip, + org=data.get("org"), + country=data.get("country"), + city=data.get("city"), + url=url, + ) + except Exception: # pragma: no cover + return IPInfoResult(ip=ip, org=None, country=None, city=None, url=url) + diff --git a/src/enrichers/cti_service.py b/src/enrichers/cti_service.py index 79bab2e..8b4ee5d 100644 --- a/src/enrichers/cti_service.py +++ b/src/enrichers/cti_service.py @@ -5,7 +5,24 @@ from pathlib import Path import json -from .cti_providers import fetch_abuseipdb, AbuseIPDBResult +from .cti_providers import ( + fetch_abuseipdb, + AbuseIPDBResult, + fetch_talos, + TalosResult, + fetch_virustotal, + VirusTotalResult, +) +from .cti_providers_ext import ( + fetch_otx, + OTXResult, + fetch_greynoise, + GreyNoiseResult, + fetch_threatfox, + ThreatFoxResult, + fetch_ipinfo, + IPInfoResult, +) @dataclass @@ -17,6 +34,30 @@ class CTIRecord: country: Optional[str] = None url: Optional[str] = None risk: str = "unknown" # low/medium/high/unknown + # Talos + talos_reputation: Optional[str] = None + talos_owner: Optional[str] = None + talos_url: Optional[str] = None + # VirusTotal + vt_malicious: Optional[int] = None + vt_suspicious: Optional[int] = None + vt_url: Optional[str] = None + # OTX + otx_pulse_count: Optional[int] = None + otx_reputation: Optional[int] = None + otx_url: Optional[str] = None + # GreyNoise + greynoise_classification: Optional[str] = None + greynoise_name: Optional[str] = None + greynoise_url: Optional[str] = None + # ThreatFox + threatfox_matches: Optional[int] = None + threatfox_url: Optional[str] = None + # IPInfo (enrichment only) + ipinfo_org: Optional[str] = None + ipinfo_country: Optional[str] = None + ipinfo_city: Optional[str] = None + ipinfo_url: Optional[str] = None def to_dict(self) -> Dict[str, object]: return asdict(self) @@ -34,6 +75,38 @@ def _score_to_risk(score: Optional[int], reports: Optional[int]) -> str: return "low" +def _merge_risk(base: str, talos_rep: Optional[str], vt_mal: Optional[int], vt_susp: Optional[int]) -> str: + # Upgrade risk based on Talos/VirusTotal signals + r = base + rep = (talos_rep or "").lower() + if rep in {"untrusted", "malicious"}: + r = "high" + elif rep in {"questionable"} and r == "low": + r = "medium" + mal = vt_mal or 0 + susp = vt_susp or 0 + if mal >= 5: + r = "high" + elif mal >= 1 or susp >= 3: + if r == "low": + r = "medium" + return r + + +def _merge_risk_ext(current: str, + otx_pulses: Optional[int], + greynoise_cls: Optional[str], + threatfox_matches: Optional[int]) -> str: + r = current + if greynoise_cls and greynoise_cls.lower() == "malicious": + r = "high" + if (otx_pulses or 0) >= 3 and r == "low": + r = "medium" + if (threatfox_matches or 0) >= 1: + r = "high" + return r + + def _load_cache(path: Path) -> Dict[str, Dict[str, object]]: if path.exists(): try: @@ -50,56 +123,127 @@ def _save_cache(path: Path, data: Dict[str, Dict[str, object]]) -> None: def cti_for_ips( ips: Iterable[str], - provider: str = "abuseipdb", + providers: Iterable[str] = ("abuseipdb", "talos", "virustotal", "otx", "greynoise", "threatfox", "ipinfo"), cache_path: Path | None = Path("data/cache/cti_cache.json"), force_refresh: bool = False, + virustotal_api_key: Optional[str] = None, + otx_api_key: Optional[str] = None, + greynoise_api_key: Optional[str] = None, + ipinfo_token: Optional[str] = None, + *, + batch_size: int | None = None, + pause_seconds: float = 0.0, + cache_flush_every: int = 10, ) -> Dict[str, CTIRecord]: results: Dict[str, CTIRecord] = {} unique_ips = list(dict.fromkeys(i for i in ips if i)) cache: Dict[str, Dict[str, object]] = {} if cache_path: cache = _load_cache(cache_path) - if provider == "abuseipdb": - for ip in unique_ips: - if not force_refresh and ip in cache: - cached = cache[ip] - rec = CTIRecord( - ip=ip, - source="abuseipdb", - abuse_confidence_score=cached.get("abuse_confidence_score"), - total_reports=cached.get("total_reports"), - country=cached.get("country"), - url=cached.get("url"), - ) - else: - r: AbuseIPDBResult = fetch_abuseipdb(ip) - rec = CTIRecord( - ip=ip, - source="abuseipdb", - abuse_confidence_score=r.abuse_confidence_score, - total_reports=r.total_reports, - country=r.country, - url=r.url, - ) - if cache_path: - cache[ip] = { - "abuse_confidence_score": rec.abuse_confidence_score, - "total_reports": rec.total_reports, - "country": rec.country, - "url": rec.url, - } - rec = CTIRecord( - ip=rec.ip, - source=rec.source, - abuse_confidence_score=rec.abuse_confidence_score, - total_reports=rec.total_reports, - country=rec.country, - url=rec.url, - ) - rec.risk = _score_to_risk(rec.abuse_confidence_score, rec.total_reports) - results[ip] = rec + processed = 0 + for ip in unique_ips: + cached = cache.get(ip, {}) if cache_path else {} + # Start from cached/base + rec = CTIRecord( + ip=ip, + source="multi", + abuse_confidence_score=cached.get("abuse_confidence_score"), + total_reports=cached.get("total_reports"), + country=cached.get("country"), + url=cached.get("url"), + talos_reputation=cached.get("talos_reputation"), + talos_owner=cached.get("talos_owner"), + talos_url=cached.get("talos_url"), + vt_malicious=cached.get("vt_malicious"), + vt_suspicious=cached.get("vt_suspicious"), + vt_url=cached.get("vt_url"), + otx_pulse_count=cached.get("otx_pulse_count"), + otx_reputation=cached.get("otx_reputation"), + otx_url=cached.get("otx_url"), + greynoise_classification=cached.get("greynoise_classification"), + greynoise_name=cached.get("greynoise_name"), + greynoise_url=cached.get("greynoise_url"), + threatfox_matches=cached.get("threatfox_matches"), + threatfox_url=cached.get("threatfox_url"), + ipinfo_org=cached.get("ipinfo_org"), + ipinfo_country=cached.get("ipinfo_country"), + ipinfo_city=cached.get("ipinfo_city"), + ipinfo_url=cached.get("ipinfo_url"), + ) + # Fetch live if force or missing + if force_refresh or rec.abuse_confidence_score is None and ("abuseipdb" in providers): + a: AbuseIPDBResult = fetch_abuseipdb(ip) + rec.abuse_confidence_score = a.abuse_confidence_score + rec.total_reports = a.total_reports + rec.country = a.country + rec.url = a.url + if force_refresh or rec.talos_reputation is None and ("talos" in providers): + t: TalosResult = fetch_talos(ip) + rec.talos_reputation = t.reputation + rec.talos_owner = t.owner + rec.talos_url = t.url + if force_refresh or rec.vt_malicious is None and ("virustotal" in providers): + v: VirusTotalResult = fetch_virustotal(ip, virustotal_api_key) + rec.vt_malicious = v.malicious + rec.vt_suspicious = v.suspicious + rec.vt_url = v.url + if force_refresh or rec.otx_pulse_count is None and ("otx" in providers): + o: OTXResult = fetch_otx(ip, otx_api_key) + rec.otx_pulse_count = o.pulse_count + rec.otx_reputation = o.reputation + rec.otx_url = o.url + if force_refresh or rec.greynoise_classification is None and ("greynoise" in providers): + g: GreyNoiseResult = fetch_greynoise(ip, greynoise_api_key) + rec.greynoise_classification = g.classification + rec.greynoise_name = g.name + rec.greynoise_url = g.url + if force_refresh or rec.threatfox_matches is None and ("threatfox" in providers): + tf: ThreatFoxResult = fetch_threatfox(ip) + rec.threatfox_matches = tf.matches + rec.threatfox_url = tf.url + if force_refresh or rec.ipinfo_org is None and ("ipinfo" in providers): + ii: IPInfoResult = fetch_ipinfo(ip, ipinfo_token) + rec.ipinfo_org = ii.org + rec.ipinfo_country = ii.country + rec.ipinfo_city = ii.city + rec.ipinfo_url = ii.url + # Compute risk + base = _score_to_risk(rec.abuse_confidence_score, rec.total_reports) + rec.risk = _merge_risk(base, rec.talos_reputation, rec.vt_malicious, rec.vt_suspicious) + rec.risk = _merge_risk_ext(rec.risk, rec.otx_pulse_count, rec.greynoise_classification, rec.threatfox_matches) + results[ip] = rec if cache_path: + cache[ip] = { + "abuse_confidence_score": rec.abuse_confidence_score, + "total_reports": rec.total_reports, + "country": rec.country, + "url": rec.url, + "talos_reputation": rec.talos_reputation, + "talos_owner": rec.talos_owner, + "talos_url": rec.talos_url, + "vt_malicious": rec.vt_malicious, + "vt_suspicious": rec.vt_suspicious, + "vt_url": rec.vt_url, + "otx_pulse_count": rec.otx_pulse_count, + "otx_reputation": rec.otx_reputation, + "otx_url": rec.otx_url, + "greynoise_classification": rec.greynoise_classification, + "greynoise_name": rec.greynoise_name, + "greynoise_url": rec.greynoise_url, + "threatfox_matches": rec.threatfox_matches, + "threatfox_url": rec.threatfox_url, + "ipinfo_org": rec.ipinfo_org, + "ipinfo_country": rec.ipinfo_country, + "ipinfo_city": rec.ipinfo_city, + "ipinfo_url": rec.ipinfo_url, + } + processed += 1 + # Optional pause and periodic cache flush for resiliency on large batches + if cache_path and processed % max(1, cache_flush_every) == 0: _save_cache(cache_path, cache) - else: - raise ValueError(f"Unsupported CTI provider: {provider}") + if batch_size and (processed % batch_size == 0) and pause_seconds > 0: + import time as _t + _t.sleep(pause_seconds) + if cache_path: + _save_cache(cache_path, cache) return results diff --git a/src/enrichers/llm_enricher.py b/src/enrichers/llm_enricher.py index 82bb24c..3298ed3 100644 --- a/src/enrichers/llm_enricher.py +++ b/src/enrichers/llm_enricher.py @@ -1,7 +1,10 @@ from __future__ import annotations import json -from typing import Dict, List +from typing import Dict, List, Iterable, Tuple, Optional +from datetime import datetime +from ..parsers.ua_analysis import detect_suspicious_user_agent +from ..config import get_settings from ..groq_client import GroqRotatingClient @@ -13,26 +16,126 @@ ) -def enrich_log_records(records: List[Dict[str, object]], use_llm: bool = True) -> List[Dict[str, object]]: +def enrich_log_records( + records: List[Dict[str, object]], + use_llm: bool = True, + *, + llm_sample: Optional[int] = None, + group_by: Optional[List[str]] = None, + group_window_sec: Optional[int] = None, + llm_gate_min_4xx: Optional[int] = None, + llm_gate_ua: bool = False, +) -> List[Dict[str, object]]: + """Enrich records using LLM with optional grouping and sampling to reduce requests. + + - When ``use_llm`` is False, returns pass-through annotations. + - If ``group_by`` is provided, records are grouped by the tuple of those fields and a single + representative from each group is sent to the LLM; the response is propagated to the group. + - If ``llm_sample`` is provided, only that many groups are sent to the LLM and the rest are + annotated with defaults (severity=unknown, rationale="LLM sampled out"). + """ if not use_llm: - # Pass-through with default annotations return [ {**r, "severity": "unknown", "iocs": [r.get("ip")] if r.get("ip") else [], "rationale": "LLM disabled"} for r in records ] + # Helper to build group key + def _parse_time_bucket(rec: Dict[str, object]) -> Optional[int]: + if not group_window_sec: + return None + raw = str(rec.get("time") or rec.get("timestamp") or "").strip() + if not raw: + return None + # Try common formats + for fmt in ("%d/%b/%Y:%H:%M:%S %z", "%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M:%S.%f%z"): + try: + dt = datetime.strptime(raw, fmt) + return int(dt.timestamp()) // int(group_window_sec) + except Exception: + continue + return None + + def _key(rec: Dict[str, object]) -> Tuple[object, ...]: + if not group_by: + return (id(rec),) # unique per record so it behaves like "no grouping" + bucket = _parse_time_bucket(rec) + base = tuple(rec.get(k) for k in group_by) + return base + ((bucket,) if bucket is not None else tuple()) + + # Build groups: key -> list of indices + groups: Dict[Tuple[object, ...], List[int]] = {} + per_group_stats: Dict[Tuple[object, ...], Dict[str, int | bool]] = {} + for idx, rec in enumerate(records): + k = _key(rec) + groups.setdefault(k, []).append(idx) + # accumulate stats + st = per_group_stats.setdefault(k, {"count": 0, "errors_4xx": 0, "ua_susp": False}) + st["count"] = int(st["count"]) + 1 + try: + status = int(rec.get("status", 0)) + except Exception: + status = 0 + if 400 <= status < 500: + st["errors_4xx"] = int(st["errors_4xx"]) + 1 + ua_str = str(rec.get("ua") or rec.get("user_agent") or "") + if ua_str: + settings = get_settings() + susp, _ = detect_suspicious_user_agent(ua_str, patterns=settings.suspicious_ua_patterns or None) + st["ua_susp"] = bool(st["ua_susp"]) or susp + + # Select which groups to actually send to LLM + group_keys: List[Tuple[object, ...]] = list(groups.keys()) + # Prefer larger groups first to maximize coverage + group_keys.sort(key=lambda k: len(groups[k]), reverse=True) + # Apply gating if requested + if llm_gate_min_4xx is not None or llm_gate_ua: + gated: List[Tuple[object, ...]] = [] + for k in group_keys: + stats = per_group_stats.get(k, {}) + ok = True + if llm_gate_min_4xx is not None: + ok = ok and int(stats.get("errors_4xx", 0)) >= int(llm_gate_min_4xx) + if llm_gate_ua: + ok = ok and bool(stats.get("ua_susp", False)) + if ok: + gated.append(k) + group_keys = gated + if llm_sample is not None and llm_sample >= 0: + group_keys = group_keys[:llm_sample] + client = GroqRotatingClient() - enriched: List[Dict[str, object]] = [] - for r in records: + # Map group key -> parsed enrichment + parsed_by_group: Dict[Tuple[object, ...], Dict[str, object]] = {} + + # Enrich selected groups + for k in group_keys: + rep_index = groups[k][0] + r = records[rep_index] user = f"Log: {json.dumps(r, ensure_ascii=False)}" - content = client.chat([ - {"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": user}, - ]) + try: + content = client.chat([ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": user}, + ]) + except Exception as e: # budget/rate/network + parsed_by_group[k] = {"severity": "unknown", "iocs": [], "rationale": f"LLM unavailable: {str(e)[:120]}"} + continue try: parsed = json.loads(content) except Exception: parsed = {"severity": "unknown", "iocs": [], "rationale": content[:200]} + parsed_by_group[k] = parsed + + enriched: List[Dict[str, object]] = [] + for idx, r in enumerate(records): + k = _key(r) + parsed = parsed_by_group.get(k) + if parsed is None: + reason = "LLM sampled out" + # If gating was applied, clarify + if llm_gate_min_4xx is not None or llm_gate_ua: + reason = "LLM gated out" + parsed = {"severity": "unknown", "iocs": [r.get("ip")] if r.get("ip") else [], "rationale": reason} enriched.append({**r, **parsed}) return enriched - diff --git a/src/groq_client.py b/src/groq_client.py index 58ee1c0..a758c15 100644 --- a/src/groq_client.py +++ b/src/groq_client.py @@ -3,6 +3,7 @@ import random import time from typing import Dict, List, Optional +import os from groq import Groq from rich.console import Console @@ -20,6 +21,12 @@ def __init__(self, api_keys: Optional[List[str]] = None, model: Optional[str] = self.model = model or settings.groq_model self._clients = [Groq(api_key=k) for k in self.keys] self._index = 0 + # Simple token budget guard (approximate tokens via chars/4) + try: + self._budget = int(os.getenv("GROQ_TOKENS_BUDGET", "0")) or None + except ValueError: + self._budget = None + self._used = 0 def _next_client(self) -> Groq: if not self._clients: @@ -39,6 +46,11 @@ def chat(self, messages: List[Dict[str, str]], """ last_error: Optional[Exception] = None m = model or self.model + # pre-check budget + if self._budget is not None: + approx_tokens = sum(len(m.get("content", "")) for m in messages) // 4 + 32 + if self._used + approx_tokens > self._budget: + raise RuntimeError("LLM budget exhausted; set GROQ_TOKENS_BUDGET higher or reduce --llm-sample") for attempt in range(max_retries): client = self._next_client() try: @@ -47,6 +59,10 @@ def chat(self, messages: List[Dict[str, str]], messages=messages, temperature=0.2, ) + # book tokens used (approx) + if self._budget is not None: + approx_tokens = sum(len(m.get("content", "")) for m in messages) // 4 + 32 + self._used += approx_tokens return resp.choices[0].message.content or "" except Exception as e: # pragma: no cover - network specific last_error = e @@ -59,4 +75,3 @@ def chat(self, messages: List[Dict[str, str]], continue raise raise RuntimeError(f"Groq chat failed after {max_retries} attempts: {last_error}") - diff --git a/src/parsers/ua_analysis.py b/src/parsers/ua_analysis.py index 74c00ae..90e6315 100644 --- a/src/parsers/ua_analysis.py +++ b/src/parsers/ua_analysis.py @@ -1,7 +1,7 @@ from __future__ import annotations import re -from typing import List, Tuple +from typing import List, Tuple, Optional SUSPICIOUS_AGENTS = [ @@ -19,12 +19,12 @@ ] -def detect_suspicious_user_agent(ua: str | None) -> Tuple[bool, str | None]: +def detect_suspicious_user_agent(ua: Optional[str], patterns: Optional[List[str]] = None) -> Tuple[bool, Optional[str]]: if not ua: return False, None ua_l = ua.lower() - for pat in SUSPICIOUS_AGENTS: + pats = patterns if patterns else SUSPICIOUS_AGENTS + for pat in pats: if re.search(pat, ua_l): return True, pat return False, None - diff --git a/src/reports/report_builder.py b/src/reports/report_builder.py index bc5f85d..06bb121 100644 --- a/src/reports/report_builder.py +++ b/src/reports/report_builder.py @@ -32,8 +32,8 @@ def build_markdown_report( if not suspicious: lines.append("No suspicious IPs identified.\n") else: - lines.append(_md_row(["IP", "Risk", "Abuse Score", "Total Reports", "Country", "Requests", "4xx", "Suspicious UA", "One-line Explain"])) - lines.append(_md_row(["---"] * 9)) + lines.append(_md_row(["IP", "Risk", "Abuse Score", "Total Reports", "Country", "Requests", "4xx", "Suspicious UA", "Talos", "VT (mal/susp)", "One-line Explain"])) + lines.append(_md_row(["---"] * 11)) for s in suspicious: lines.append( _md_row([ @@ -45,6 +45,8 @@ def build_markdown_report( str(s.get("requests", "")), str(s.get("errors_4xx", "")), "yes" if s.get("ua_suspicious") else "no", + str(s.get("talos_reputation", "")), + f"{s.get('vt_malicious','')}/{s.get('vt_suspicious','')}", str(s.get("ai_one_liner", "")), ]) ) @@ -80,7 +82,8 @@ def build_text_report( lines.append( f"- {s.get('ip')} | risk={s.get('risk')} | score={s.get('abuse_confidence_score')} | " f"reports={s.get('total_reports')} | country={s.get('country')} | req={s.get('requests')} | " - f"4xx={s.get('errors_4xx')} | UA suspicious={'yes' if s.get('ua_suspicious') else 'no'}\n" + f"4xx={s.get('errors_4xx')} | UA suspicious={'yes' if s.get('ua_suspicious') else 'no'} | " + f"talos={s.get('talos_reputation')} | vt={s.get('vt_malicious')}/{s.get('vt_suspicious')}\n" ) if s.get("ai_one_liner"): lines.append(f" AI: {s.get('ai_one_liner')}\n") @@ -88,3 +91,32 @@ def build_text_report( path.write_text("".join(lines), encoding="utf-8") return path + +def build_malicious_ai_report( + out_dir: Path, + content: str, + *, + title: str = "Malicious Activity AI Report", +) -> tuple[Path, Path]: + """Write a detailed AI-written malicious activity report to txt and md. + + Returns: (txt_path, md_path) + """ + out_dir.mkdir(parents=True, exist_ok=True) + txt_path = out_dir / "malicious_ai_report.txt" + md_path = out_dir / "malicious_ai_report.md" + + # Text version + lines_txt: List[str] = [] + lines_txt.append(f"{title}\n") + lines_txt.append("=" * len(title) + "\n\n") + lines_txt.append(content.strip() + "\n") + txt_path.write_text("".join(lines_txt), encoding="utf-8") + + # Markdown version + lines_md: List[str] = [] + lines_md.append(f"# {title}\n\n") + lines_md.append(content.strip() + "\n") + md_path.write_text("".join(lines_md), encoding="utf-8") + + return txt_path, md_path diff --git a/tests/cli/test_ai_malicious_report.py b/tests/cli/test_ai_malicious_report.py new file mode 100644 index 0000000..1f7e09a --- /dev/null +++ b/tests/cli/test_ai_malicious_report.py @@ -0,0 +1,41 @@ +import os +from pathlib import Path + +from src import cli + + +class DummyGroq: + def chat(self, messages): + return "DUMMY MALICIOUS REPORT" + + +def test_ai_malicious_report_offline_blocklist(tmp_path, monkeypatch): + # Prepare a log with one IP that will be escalated via offline blocklist + log = tmp_path / "access_log.txt" + log.write_text( + '\n'.join([ + '10.9.9.9 - - [10/Oct/2000:13:55:36 -0700] "GET /a HTTP/1.1" 404 0 "-" "sqlmap/1.7"', + '10.9.9.9 - - [10/Oct/2000:13:55:40 -0700] "GET /b HTTP/1.1" 404 0 "-" "sqlmap/1.7"', + ]), + encoding="utf-8", + ) + # Create offline blocklist and point env var to it so risk escalates to high + bl = tmp_path / "blocklist.txt" + bl.write_text("10.9.9.9\n", encoding="utf-8") + monkeypatch.setenv("OFFLINE_IP_BLOCKLIST", str(bl)) + # Ensure LLM path is taken; set a dummy key and monkeypatch client + monkeypatch.setenv("GROQ_API_KEYS", "dummy-key") + monkeypatch.setattr(cli, "GroqRotatingClient", lambda: DummyGroq()) + + rc = cli.main([ + str(log), + "--out", str(tmp_path), + "--no-cti", # avoid live CTI calls + "--ai-malicious-report", + "--color", "never", + ]) + assert rc == 0 + rpt = tmp_path / "reports" / "malicious_ai_report.txt" + assert rpt.exists() + assert "DUMMY MALICIOUS REPORT" in rpt.read_text(encoding="utf-8") + diff --git a/tests/cli/test_cli_main.py b/tests/cli/test_cli_main.py index 7cc7c56..f5f7bb5 100644 --- a/tests/cli/test_cli_main.py +++ b/tests/cli/test_cli_main.py @@ -28,3 +28,18 @@ def test_cli_main_unsupported(tmp_path: Path): p.write_bytes(b"\x00\x01") rc = cli.main([str(p), "--out", str(tmp_path), "--no-llm", "--no-cti", "--no-reports"]) assert rc == 2 + + +def test_cli_main_txt_log_autodetect(tmp_path: Path): + txt_log = tmp_path / "new_log.txt" + txt_log.write_text('127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /a HTTP/1.1" 200 123 "-" "Mozilla/5.0"\n', encoding="utf-8") + rc = cli.main([ + str(txt_log), + "--out", str(tmp_path), + "--no-llm", "--no-cti", "--no-reports", + "--color", "never", + ]) + assert rc == 0 + # Confirm log output exists + out_jsonl = tmp_path / "new_log.jsonl" + assert out_jsonl.exists() diff --git a/tests/enrichers/test_cti_providers_parsing.py b/tests/enrichers/test_cti_providers_parsing.py new file mode 100644 index 0000000..027fafd --- /dev/null +++ b/tests/enrichers/test_cti_providers_parsing.py @@ -0,0 +1,98 @@ +import types + +import pytest + +from src.enrichers.cti_providers import ( + fetch_abuseipdb, + fetch_talos, + fetch_virustotal, +) + + +class _Resp: + def __init__(self, text: str = "", status: int = 200, json_data=None): + self.text = text + self.status_code = status + self._json = json_data or {} + + def raise_for_status(self): + if self.status_code >= 400: + raise RuntimeError("http error") + + def json(self): + return self._json + + +class _Client: + def __init__(self, *, text: str = "", status: int = 200, json_data=None, **_: object): + self._resp = _Resp(text=text, status=status, json_data=json_data) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def get(self, url: str): # noqa: ARG002 - exercised by provider code + return self._resp + + +def test_fetch_abuseipdb_parses_html(monkeypatch): + html = """ + +
Abuse Confidence Score: 90
+
Total Reports: 123
+
Country: United States
+ + """ + + # Patch httpx.Client to our stub + import httpx # type: ignore + + monkeypatch.setattr(httpx, "Client", lambda **kwargs: _Client(text=html)) + + res = fetch_abuseipdb("1.2.3.4") + assert res.ip == "1.2.3.4" + assert res.abuse_confidence_score == 90 + assert res.total_reports == 123 + assert res.country == "United States" + assert "abuseipdb" in res.url + + +def test_fetch_talos_parses_html(monkeypatch): + html = """ + +
Web Reputation: Malicious
+
Owner: Example ISP, Inc.
+ + """ + import httpx # type: ignore + + monkeypatch.setattr(httpx, "Client", lambda **kwargs: _Client(text=html)) + + res = fetch_talos("5.6.7.8") + assert res.ip == "5.6.7.8" + assert res.reputation == "Malicious" + assert res.owner == "Example ISP, Inc." + assert "talos" in res.url + + +def test_fetch_virustotal_parses_json(monkeypatch): + payload = { + "data": { + "attributes": { + "last_analysis_stats": {"malicious": 2, "suspicious": 3} + } + } + } + + import httpx # type: ignore + + monkeypatch.setattr(httpx, "Client", lambda **kwargs: _Client(json_data=payload)) + + res = fetch_virustotal("9.9.9.9", api_key="dummy") + assert res.ip == "9.9.9.9" + assert res.malicious == 2 + assert res.suspicious == 3 + assert "virustotal" in res.url + diff --git a/tests/groq_client/test_groq_rotating_client.py b/tests/groq_client/test_groq_rotating_client.py new file mode 100644 index 0000000..70a5819 --- /dev/null +++ b/tests/groq_client/test_groq_rotating_client.py @@ -0,0 +1,49 @@ +from types import SimpleNamespace + +import pytest + +import src.groq_client as gc + + +class FakeCompletions: + def __init__(self, content: str): + self._content = content + + def create(self, model, messages, temperature): # noqa: ARG002 + return SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content=self._content))]) + + +class FakeChat: + def __init__(self, content: str): + self.completions = FakeCompletions(content) + + +class FakeGroq: + def __init__(self, api_key: str): + # Expose api_key for assertions via _next_client + self.api_key = api_key + self.chat = FakeChat("ok") + + +def test_next_client_rotates(monkeypatch): + # Patch Groq class to our fake + monkeypatch.setattr(gc, "Groq", FakeGroq) + client = gc.GroqRotatingClient(api_keys=["k1", "k2"], model="m") + + c1 = client._next_client() + c2 = client._next_client() + c3 = client._next_client() + assert getattr(c1, "api_key", None) == "k1" + assert getattr(c2, "api_key", None) == "k2" + assert getattr(c3, "api_key", None) == "k1" + + +def test_chat_success_path(monkeypatch): + monkeypatch.setattr(gc, "Groq", FakeGroq) + client = gc.GroqRotatingClient(api_keys=["kX"], model="m") + out = client.chat([ + {"role": "system", "content": "s"}, + {"role": "user", "content": "u"}, + ]) + assert out == "ok" + diff --git a/tests/parsers/test_text_extractor_pdf.py b/tests/parsers/test_text_extractor_pdf.py new file mode 100644 index 0000000..0abe108 --- /dev/null +++ b/tests/parsers/test_text_extractor_pdf.py @@ -0,0 +1,19 @@ +from pathlib import Path + +from PyPDF2 import PdfWriter + +from src.parsers.text_extractor import extract_text_from_pdf + + +def test_extract_text_from_pdf_blank(tmp_path: Path): + pdf_path = tmp_path / "blank.pdf" + writer = PdfWriter() + writer.add_blank_page(width=72, height=72) + with pdf_path.open("wb") as f: + writer.write(f) + + text = extract_text_from_pdf(pdf_path) + # Blank page yields empty string but exercises the code path + assert isinstance(text, str) + assert text == "" + diff --git a/ui/app.py b/ui/app.py new file mode 100644 index 0000000..4d1cbb2 --- /dev/null +++ b/ui/app.py @@ -0,0 +1,125 @@ +import json +from pathlib import Path + +import pandas as pd +import streamlit as st +from streamlit_autorefresh import st_autorefresh + + +st.set_page_config(page_title="LogCTI Dashboard", page_icon="🛡️", layout="wide") +st.title("Log + CTI Dashboard 🛡️") + + +@st.cache_data(show_spinner=False) +def load_jsonl(path: Path) -> pd.DataFrame: + rows = [] + for line in path.read_text(encoding="utf-8").splitlines(): + try: + rows.append(json.loads(line)) + except Exception: + continue + return pd.DataFrame(rows) + + +def tail_jsonl(path: Path, start_pos: int = 0, max_lines: int = 2000) -> tuple[list[dict], int]: + rows: list[dict] = [] + try: + with path.open("rb") as f: + f.seek(start_pos) + for i, line in enumerate(f): + if i > max_lines: + break + try: + rows.append(json.loads(line.decode("utf-8", errors="ignore"))) + except Exception: + continue + pos = f.tell() + except FileNotFoundError: + return [], 0 + return rows, pos + + +def list_processed_files(base: Path) -> list[Path]: + if not base.exists(): + return [] + return sorted([p for p in base.glob("*.jsonl")], key=lambda p: p.stat().st_mtime, reverse=True) + + +col1, col2 = st.columns([2, 1]) +with col2: + base_dir = st.text_input("Processed dir", value=str(Path("data/processed").resolve())) + base_path = Path(base_dir) + files = list_processed_files(base_path) + file_names = [f.name for f in files] + selected = st.selectbox("Enriched file", options=file_names) if files else None + uploaded = st.file_uploader("...or upload enriched .jsonl", type=["jsonl"]) # optional + refresh_ms = st.slider("Auto-refresh (ms)", min_value=0, max_value=10000, step=500, value=2000, + help="Set to 0 to disable auto-refresh") + if refresh_ms > 0: + st_autorefresh(interval=refresh_ms, key="auto_refresh") + +df = pd.DataFrame() +if uploaded is not None: + df = pd.DataFrame([json.loads(l) for l in uploaded.getvalue().decode("utf-8").splitlines() if l.strip()]) +elif selected: + # Use tailing for scalability and near real-time updates + file_path = base_path / selected + if "_tail_pos" not in st.session_state or st.session_state.get("_tail_file") != str(file_path): + st.session_state["_tail_pos"] = 0 + st.session_state["_tail_file"] = str(file_path) + new_rows, new_pos = tail_jsonl(file_path, st.session_state["_tail_pos"], max_lines=5000) + st.session_state["_tail_pos"] = new_pos + df = pd.DataFrame(new_rows) if new_rows else load_jsonl(file_path) + +if df.empty: + st.info("Select or upload an enriched JSONL file to explore results.") + st.stop() + +# Metrics +total_requests = len(df) +unique_ips = df["ip"].nunique() if "ip" in df.columns else 0 +status_counts = df["status"].astype(str).value_counts() if "status" in df.columns else pd.Series(dtype=int) + +with col1: + m1, m2, m3 = st.columns(3) + m1.metric("Total requests", f"{total_requests}") + m2.metric("Unique IPs", f"{unique_ips}") + if not status_counts.empty: + m3.metric("Top status", f"{status_counts.index[0]}: {int(status_counts.iloc[0])}") + +st.subheader("Status distribution") +if not status_counts.empty: + st.bar_chart(status_counts) +else: + st.write("No status data available.") + +# Suspicious IPs table (if present in records as enriched by pipeline) +cols = [ + "ip", + "severity", + "status", + "path", + "ua", + "rationale", +] +present_cols = [c for c in cols if c in df.columns] +st.subheader("Latest enriched events (tail)") +st.dataframe(df[present_cols].tail(200), use_container_width=True) + +# Aggregate suspicious overview from records if they contain CTI annotations +cti_cols = [ + "ip", + "risk", + "abuse_confidence_score", + "total_reports", + "country", + "talos_reputation", + "vt_malicious", + "vt_suspicious", +] +present_cti = [c for c in cti_cols if c in df.columns] +if present_cti: + st.subheader("CTI Signals (per record view)") + st.dataframe(df[present_cti].dropna(how="all").tail(300), use_container_width=True) + +st.caption("Tip: generate enriched JSONL via `python -m src.cli --out data/processed`. The dashboard will auto-refresh.")