diff --git a/dissect/target/plugins/apps/webserver/nginx.py b/dissect/target/plugins/apps/webserver/nginx.py index c38484be61..1f72de2b79 100644 --- a/dissect/target/plugins/apps/webserver/nginx.py +++ b/dissect/target/plugins/apps/webserver/nginx.py @@ -1,97 +1,350 @@ +from __future__ import annotations + +import json import re from datetime import datetime from pathlib import Path from typing import Iterator -from dissect.target import plugin -from dissect.target.exceptions import FileNotFoundError, UnsupportedPluginError +from dissect.target.exceptions import UnsupportedPluginError from dissect.target.helpers.fsutil import open_decompress +from dissect.target.plugin import export from dissect.target.plugins.apps.webserver.webserver import ( WebserverAccessLogRecord, + WebserverErrorLogRecord, + WebserverHostRecord, WebserverPlugin, ) from dissect.target.target import Target -LOG_REGEX = re.compile( - r'(?P.*?) - (?P.*?) \[(?P\d{2}\/[A-Za-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] "(?P.*?) (?P.*?) ?(?PHTTP\/.*?)?" (?P\d{3}) (?P-|\d+) (["](?P(\-)|(.+))["]) "(?P.*?)"', # noqa: E501 +# Reference: https://nginx/org/en/docs/http/ngx_http_log_module.html#log_format +RE_ACCESS_LOG = re.compile( + r""" + (?P.*?)\s-\s(?P.*?) + \s + \[(?P\d{2}\/[A-Za-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2}\s(\+|\-)\d{4})\] + \s + \"(?P.*?)\s(?P.*?)\s?(?PHTTP\/.*?)?\" + \s + (?P\d{3}) + \s + (?P-|\d+) + \s + (["](?P(\-)|(.+))["]) + \s + \"(?P.*?)\" + """, + re.VERBOSE, ) +# Reference: https://github.com/nginx/nginx/blob/master/src/core/ngx_log.c +RE_ERROR_LOG = re.compile( + r""" + (?P\d{4}\/\d{2}\/\d{2}\s\d{2}:\d{2}:\d{2}) # YYYY/MM/DD HH:MM:SS + \s + \[(?P\S+)\] + \s + (?P\d+)\#(?P\d+)\: # 12345#12345: + \s + (?P.+) + """, + re.VERBOSE, +) + +# Reference: https://nginx.org/en/docs/http/ngx_http_log_module.html#access_log +RE_ACCESS_LOG_DIRECTIVE = re.compile( + r""" + (?:\#\s+?)? # optionally include disabled directives + access_log\s+ + (?P[^\s\;]+) + (?:\s(?P[^\s\;]+))? # capture format and ignore other arguments + """, + re.VERBOSE, +) + +# Reference: https://nginx.org/en/docs/ngx_core_module.html#error_log +RE_ERROR_LOG_DIRECTIVE = re.compile( + r""" + (?:\#\s+?)? # optionally include disabled directives + error_log\s+ + (?P[^\s\;]+) + (?:\s(?P[^\s\;]+))? # capture optional level + """, + re.VERBOSE, +) + +# Reference: https://nginx.org/en/docs/ngx_core_module.html#include +RE_INCLUDE_DIRECTIVE = re.compile(r"[\s#]*include\s+(?P[^\s\;]+)") + class NginxPlugin(WebserverPlugin): - """Nginx webserver plugin.""" + """NGINX webserver plugin.""" __namespace__ = "nginx" + DEFAULT_LOG_DIRS = [ + "/var/log/nginx", + "/var/log", + ] + + ACCESS_LOG_NAMES = ["access.log"] + ERROR_LOG_NAMES = ["error.log"] + + DEFAULT_CONFIG_PATHS = [ + "/etc/nginx/nginx.conf", + "/etc/nginx/sites-available/*.conf", + "/etc/nginx/sites-enabled/*.conf", + ] + def __init__(self, target: Target): super().__init__(target) - self.log_paths = self.get_log_paths() - def check_compatible(self) -> None: - if not len(self.log_paths): - raise UnsupportedPluginError("No NGINX directories found") + self.access_paths = set() + self.error_paths = set() + self.host_paths = set() + + self.find_logs() - def get_log_paths(self) -> list[Path]: - log_paths = [] + def check_compatible(self) -> None: + if not self.access_paths and not self.error_paths and not self.host_paths: + raise UnsupportedPluginError("No NGINX log or config files found on target") + def find_logs(self) -> None: # Add any well known default NGINX log locations - log_paths.extend(self.target.fs.path("/var/log/nginx").glob("access.log*")) + for log_dir in self.DEFAULT_LOG_DIRS: + log_dir = self.target.fs.path(log_dir) + for log_name in self.ACCESS_LOG_NAMES: + self.access_paths.update(log_dir.glob(f"{log_name}*")) + for log_name in self.ERROR_LOG_NAMES: + self.error_paths.update(log_dir.glob(f"{log_name}*")) # Check for custom paths in NGINX install config - if (config_file := self.target.fs.path("/etc/nginx/nginx.conf")).exists(): - for line in config_file.open("rt"): - line = line.strip() - if not line or "access_log " not in line: - continue + for config_file in self.DEFAULT_CONFIG_PATHS: + if "*" in config_file: + base, _, glob = config_file.partition("*") + for f in self.target.fs.path(base).rglob(f"*{glob}"): + self.parse_config(f) - try: - line = line.split("access_log")[1].strip() - log_path = self.target.fs.path(line.split()[0]) - log_paths.extend( - path for path in log_path.parent.glob(f"{log_path.name}*") if path not in log_paths - ) - except IndexError: - self.target.log.warning("Unexpected NGINX log configuration: %s (%s)", line, config_file) + elif (config_file := self.target.fs.path(config_file)).exists(): + self.parse_config(config_file) + + def parse_config(self, path: Path, seen: set[Path] | None = None) -> None: + """Parse the given NGINX ``.conf`` file for ``access_log``, ``error_log`` and ``include`` directives.""" - return log_paths + seen = set() if seen is None else seen - @plugin.export(record=WebserverAccessLogRecord) + if path in seen: + self.target.log.warning("Detected recursion in NGINX configuration, file already parsed: %s", path) + return + + seen.add(path) + + if not path.is_file(): + self.target.log.warning("File %s does not exist on target", path) + return + + for line in path.open("rt"): + if not (line := line.strip()): + continue + + if "access_log " in line: + if access_log := RE_ACCESS_LOG_DIRECTIVE.search(line): + access_log = self.target.fs.path(access_log["path"]) + self.access_paths.update(access_log.parent.glob(f"{access_log.name}*")) + else: + self.target.log.warning("Unable to parse nginx access_log line %r in %s", line, path) + + elif "error_log " in line: + if error_log := RE_ERROR_LOG_DIRECTIVE.search(line): + error_log = self.target.fs.path(error_log["path"]) + self.error_paths.update(error_log.parent.glob(f"{error_log.name}*")) + else: + self.target.log.warning("Unable to parse NGINX error_log line %r in %s", line, path) + + elif "server {" in line: + self.host_paths.add(path) + + elif "include " in line: + if match := RE_INCLUDE_DIRECTIVE.search(line): + path_str: str = match.groupdict().get("path") + + if "*" in path_str: + base, _, glob = path_str.partition("*") + include_paths = self.target.fs.path(base).rglob(f"*{glob}") + else: + include_paths = [self.target.fs.path(path_str)] + + for include_path in include_paths: + if include_path.is_absolute(): + self.parse_config(include_path) + else: + include_path = self.target.fs.path(path.parent).joinpath(include_path) + self.parse_config(include_path) + else: + self.target.log.warning("Unable to parse NGINX include line %r in %s", line, path) + + @export(record=WebserverAccessLogRecord) def access(self) -> Iterator[WebserverAccessLogRecord]: - """Return contents of NGINX access log files in unified WebserverAccessLogRecord format. + """Return contents of NGINX access log files in unified ``WebserverAccessLogRecord`` format. References: - https://docs.nginx.com/nginx/admin-guide/monitoring/logging/#access_log - http://nginx.org/en/docs/http/ngx_http_log_module.html#log_format """ - for path in self.log_paths: - try: - path = path.resolve(strict=True) - for line in open_decompress(path, "rt"): - line = line.strip() - if not line: - continue + for path in self.access_paths: + path = path.resolve(strict=True) + if not path.is_file(): + self.target.log.warning("NGINX log file configured but could not be found (dead symlink?): %s", path) + continue + + for line in open_decompress(path, "rt"): + if not (line := line.strip()): + continue + + log: dict[str, str] = {} - match = LOG_REGEX.match(line) - if not match: - self.target.log.warning("Could not match NGINX regex format for log line: %s (%s)", line, path) + if line[0:2] == '{"': + try: + log = parse_json_line(line) + except ValueError as e: + self.target.log.warning("Could not parse NGINX JSON log line %r in %s", line, path) + self.target.log.debug("", exc_info=e) continue + elif match := RE_ACCESS_LOG.search(line): log = match.groupdict() - yield WebserverAccessLogRecord( - ts=datetime.strptime(log["datetime"], "%d/%b/%Y:%H:%M:%S %z"), - remote_ip=log["remote_ip"], - remote_user=log["remote_user"], - method=log["method"], - uri=log["uri"], - protocol=log["protocol"], - status_code=log["status_code"], - bytes_sent=log["bytes_sent"].strip("-") or 0, - referer=log["referer"], - useragent=log["useragent"], - source=path, - _target=self.target, - ) - except FileNotFoundError: - self.target.log.warning("NGINX log file configured but could not be found (dead symlink?): %s", path) - except Exception as e: - self.target.log.warning("An error occured parsing NGINX log file %s: %s", path, str(e)) - self.target.log.debug("", exc_info=e) + + else: + self.target.log.warning("Could not match NGINX format for log line %r in %s", line, path) + continue + + ts = None + bytes_sent = None + + try: + ts = datetime.strptime(log["datetime"], "%d/%b/%Y:%H:%M:%S %z") + bytes_sent = log["bytes_sent"].strip("-") or 0 + except ValueError: + pass + + log.pop("datetime") + log.pop("bytes_sent") + + yield WebserverAccessLogRecord( + ts=ts, + bytes_sent=bytes_sent, + **log, + source=path, + _target=self.target, + ) + + def error(self) -> Iterator[WebserverErrorLogRecord]: + """Return contents of NGINX error log files in unified ``WebserverErrorLogRecord`` format. + + Resources: + - https://nginx.org/en/docs/ngx_core_module.html#error_log + - https://github.com/nginx/nginx/blob/master/src/core/ngx_log.c + """ + for path in self.error_paths: + path = path.resolve(strict=True) + if not path.is_file(): + self.target.log.warning("File not found: %s", path) + continue + + for line in open_decompress(path, "rt"): + if not (line := line.strip()): + continue + + if not (match := RE_ERROR_LOG.search(line)): + self.target.log.warning("Unable to match NGINX error log message %r in %s", line, path) + continue + + log = match.groupdict() + + try: + ts = datetime.strptime(log["ts"], "%Y/%m/%d %H:%M:%S") + except ValueError: + ts = None + + log.pop("ts") + log.pop("tid") + + yield WebserverErrorLogRecord( + ts=ts, + **log, + source=path, + _target=self.target, + ) + + def hosts(self) -> Iterator[WebserverHostRecord]: + """Return found server directives in the NGINX configuration. + + Resources: + - https://nginx.org/en/docs/http/ngx_http_core_module.html#server + """ + + def yield_record(current_server: dict) -> Iterator[WebserverHostRecord]: + yield WebserverHostRecord( + ts=host_path.lstat().st_mtime, + server_name=current_server.get("server_name") or current_server.get("listen"), + server_port=current_server.get("listen"), + root_path=current_server.get("root"), + access_log_config=current_server.get("access_log"), + error_log_config=current_server.get("error_log"), + source=host_path, + _target=self.target, + ) + + for host_path in self.host_paths: + current_server = {} + seen_server_directive = False + for line in host_path.open("rt"): + if "server {" in line: + if current_server: + yield from yield_record(current_server) + current_server = {} + seen_server_directive = True + + elif seen_server_directive: + key, _, value = line.strip().partition(" ") + current_server[key] = value.rstrip(";") + + if current_server: + yield from yield_record(current_server) + + +def parse_json_line(line: str) -> dict[str, str] | None: + """Attempt to parse a default NGINX JSON log line. + + We assume the custom ``log_format`` uses the following default NGINX field names:: + + time_local, time, remote_addr, remote_ip, remote_user, request_method, request, + response, status, body_bytes_sent, request_time, http_referrer, referrer, + http_user_agent, agent + + Unfortunately NGINX has no official default naming convention for JSON access logs, + users can configure the JSON ``log_format`` as they see fit. + + Resources: + - https://nginx.org/en/docs/http/ngx_http_log_module.html + - https://github.com/elastic/examples/blob/master/Common%20Data%20Formats/nginx_json_logs/README.md + """ + + try: + json_log = json.loads(line) + return { + "datetime": json_log.get("time_local") or json_log.get("time"), + "remote_ip": json_log.get("remote_addr") or json_log.get("remote_ip"), + "remote_user": json_log.get("remote_user"), + "method": json_log.get("request_method"), + "uri": json_log.get("request_uri") or json_log.get("request"), + "status": json_log.get("status") or json_log.get("response"), + "status_code": json_log.get("status") or json_log.get("response"), + "referer": json_log.get("http_referrer") or json_log.get("referrer"), + "useragent": json_log.get("http_user_agent") or json_log.get("agent"), + "response_time_ms": json_log.get("request_time"), + "bytes_sent": json_log.get("body_bytes_sent") or json_log.get("bytes"), + } + + except json.JSONDecodeError as e: + raise ValueError(f"Could not parse NGINX log line {line!r}: {e}") from e diff --git a/tests/plugins/apps/webserver/test_nginx.py b/tests/plugins/apps/webserver/test_nginx.py index 646fb2648f..09aff9c97e 100644 --- a/tests/plugins/apps/webserver/test_nginx.py +++ b/tests/plugins/apps/webserver/test_nginx.py @@ -8,7 +8,7 @@ from tests._utils import absolute_path -def test_plugins_apps_webservers_nginx_txt(target_unix: Target, fs_unix: VirtualFilesystem) -> None: +def test_nginx_txt(target_unix: Target, fs_unix: VirtualFilesystem) -> None: data_file = absolute_path("_data/plugins/apps/webserver/nginx/access.log") fs_unix.map_file("var/log/nginx/access.log", data_file) @@ -28,7 +28,7 @@ def test_plugins_apps_webservers_nginx_txt(target_unix: Target, fs_unix: Virtual assert record.bytes_sent == 123 -def test_plugins_apps_webservers_nginx_ipv6(target_unix: Target, fs_unix: VirtualFilesystem) -> None: +def test_nginx_ipv6(target_unix: Target, fs_unix: VirtualFilesystem) -> None: data_file = absolute_path("_data/plugins/apps/webserver/nginx/access.log") fs_unix.map_file("var/log/nginx/access.log", data_file) @@ -47,7 +47,7 @@ def test_plugins_apps_webservers_nginx_ipv6(target_unix: Target, fs_unix: Virtua assert record.bytes_sent == 123 -def test_plugins_apps_webservers_nginx_gz(target_unix: Target, fs_unix: VirtualFilesystem) -> None: +def test_nginx_gz(target_unix: Target, fs_unix: VirtualFilesystem) -> None: data_file = absolute_path("_data/plugins/apps/webserver/nginx/access.log.gz") fs_unix.map_file("var/log/nginx/access.log.1.gz", data_file) @@ -66,7 +66,7 @@ def test_plugins_apps_webservers_nginx_gz(target_unix: Target, fs_unix: VirtualF assert record.bytes_sent == 123 -def test_plugins_apps_webservers_nginx_bz2(target_unix: Target, fs_unix: VirtualFilesystem) -> None: +def test_nginx_bz2(target_unix: Target, fs_unix: VirtualFilesystem) -> None: data_file = absolute_path("_data/plugins/apps/webserver/nginx/access.log.bz2") fs_unix.map_file("var/log/nginx/access.log.1.bz2", data_file) @@ -85,19 +85,20 @@ def test_plugins_apps_webservers_nginx_bz2(target_unix: Target, fs_unix: Virtual assert record.bytes_sent == 123 -def test_plugins_apps_webservers_nginx_config(target_unix: Target, fs_unix: VirtualFilesystem) -> None: +def test_nginx_config(target_unix: Target, fs_unix: VirtualFilesystem) -> None: config_file = absolute_path("_data/plugins/apps/webserver/nginx/nginx.conf") fs_unix.map_file("etc/nginx/nginx.conf", config_file) for i, log in enumerate(["access.log", "domain1.access.log", "domain2.access.log", "big.server.access.log"]): fs_unix.map_file_fh(f"opt/logs/{i}/{log}", BytesIO(b"Foo")) - log_paths = NginxPlugin(target_unix).get_log_paths() + target_unix.add_plugin(NginxPlugin) - assert len(log_paths) == 4 + assert len(target_unix.nginx.access_paths) == 4 + assert len(target_unix.nginx.error_paths) == 0 -def test_plugins_apps_webservers_nginx_config_logs_logrotated(target_unix: Target, fs_unix: VirtualFilesystem) -> None: +def test_nginx_config_logs_logrotated(target_unix: Target, fs_unix: VirtualFilesystem) -> None: config_file = absolute_path("_data/plugins/apps/webserver/nginx/nginx.conf") fs_unix.map_file("etc/nginx/nginx.conf", config_file) fs_unix.map_file_fh("opt/logs/0/access.log", BytesIO(b"Foo1")) @@ -106,20 +107,132 @@ def test_plugins_apps_webservers_nginx_config_logs_logrotated(target_unix: Targe fs_unix.map_file_fh("opt/logs/1/domain1.access.log", BytesIO(b"Foo4")) fs_unix.map_file_fh("var/log/nginx/access.log", BytesIO(b"Foo5")) - log_paths = NginxPlugin(target_unix).get_log_paths() + target_unix.add_plugin(NginxPlugin) - assert len(log_paths) == 5 + assert len(target_unix.nginx.access_paths) == 5 + assert len(target_unix.nginx.error_paths) == 0 -def test_plugins_apps_webservers_nginx_config_commented_logs(target_unix: Target, fs_unix: VirtualFilesystem) -> None: +def test_nginx_config_commented_logs(target_unix: Target, fs_unix: VirtualFilesystem) -> None: config = """ # access_log /foo/bar/old.log main; access_log /foo/bar/new.log main; + + # error_log /foo/bar/error/old.log warn; + error_log /foo/bar/error/new.log; """ fs_unix.map_file_fh("etc/nginx/nginx.conf", BytesIO(textwrap.dedent(config).encode())) fs_unix.map_file_fh("foo/bar/new.log", BytesIO(b"New")) fs_unix.map_file_fh("foo/bar/old.log", BytesIO(b"Old")) + fs_unix.map_file_fh("foo/bar/error/new.log", BytesIO(b"")) + fs_unix.map_file_fh("foo/bar/error/old.log", BytesIO(b"")) + + target_unix.add_plugin(NginxPlugin) + + assert len(target_unix.nginx.access_paths) == 2 + assert len(target_unix.nginx.error_paths) == 2 + + assert sorted(list(map(str, target_unix.nginx.access_paths))) == ["/foo/bar/new.log", "/foo/bar/old.log"] + assert sorted(list(map(str, target_unix.nginx.error_paths))) == ["/foo/bar/error/new.log", "/foo/bar/error/old.log"] + + +def test_nginx_error_logs(target_unix: Target, fs_unix: VirtualFilesystem) -> None: + """test if we detect and parse nginx error logs correctly.""" + + errors = """ + 2025/01/31 13:37:01 [alert] 12345#12345: this is a message + 2025/01/31 13:37:02 [alert] 12345#12345: this is another message + 2025/01/31 13:37:03 [alert] 12345#12345: and a third message! + """ + fs_unix.map_file_fh("var/log/nginx/error.log", BytesIO(textwrap.dedent(errors).encode())) + + target_unix.add_plugin(NginxPlugin) + records = list(target_unix.nginx.error()) + + assert len(records) == 3 + + assert records[0].ts == datetime(2025, 1, 31, 13, 37, 1, tzinfo=timezone.utc) + assert records[0].level == "alert" + assert records[0].message == "this is a message" + assert records[0].source == "/var/log/nginx/error.log" + + +def test_nginx_parse_config(target_unix: Target, fs_unix: VirtualFilesystem) -> None: + """test if we parse config files and their include directives correctly.""" + + base_conf = """ + user www www; + server { + listen 1337; + server_name example; + index index.html; + root /var/www/html; + include some.conf; + access_log /some/access.log; + } + include /more/confs/*.conf; + """ + fs_unix.map_file_fh("/etc/nginx/nginx.conf", BytesIO(textwrap.dedent(base_conf).encode())) + + some_conf = """ + error_log /some/error.log; + """ + fs_unix.map_file_fh("/etc/nginx/some.conf", BytesIO(textwrap.dedent(some_conf).encode())) + + more_confs_one = """ + server { + listen 80; + server_name eighty; + index index.html; + root /var/www/eighty; + access_log /eighty/access.log; + include /bla/foo.conf; + } + """ + fs_unix.map_file_fh("/more/confs/one.conf", BytesIO(textwrap.dedent(more_confs_one).encode())) + + foo_conf = """ + error_log /eighty/error.log; + """ + fs_unix.map_file_fh("/bla/foo.conf", BytesIO(textwrap.dedent(foo_conf).encode())) + + fs_unix.map_file_fh("/some/access.log", BytesIO(b"")) + fs_unix.map_file_fh("/some/error.log", BytesIO(b"")) + fs_unix.map_file_fh("/eighty/access.log.1", BytesIO(b"")) + fs_unix.map_file_fh("/eighty/error.log.1", BytesIO(b"")) + + target_unix.add_plugin(NginxPlugin) - log_paths = NginxPlugin(target_unix).get_log_paths() - assert str(log_paths[0]) == "/foo/bar/old.log" - assert str(log_paths[1]) == "/foo/bar/new.log" + assert sorted(list(map(str, target_unix.nginx.access_paths))) == [ + "/eighty/access.log.1", + "/some/access.log", + ] + assert sorted(list(map(str, target_unix.nginx.error_paths))) == [ + "/eighty/error.log.1", + "/some/error.log", + ] + + assert sorted(list(map(str, target_unix.nginx.host_paths))) == [ + "/etc/nginx/nginx.conf", + "/more/confs/one.conf", + ] + + records = sorted(list(target_unix.nginx.hosts()), key=lambda r: r.source) + + assert len(records) == 2 + + assert records[0].ts + assert records[0].server_name == "example" + assert records[0].server_port == 1337 + assert records[0].root_path == "/var/www/html" + assert records[0].access_log_config == "/some/access.log" + assert not records[0].error_log_config + assert records[0].source == "/etc/nginx/nginx.conf" + + assert records[1].ts + assert records[1].server_name == "eighty" + assert records[1].server_port == 80 + assert records[1].root_path == "/var/www/eighty" + assert records[1].access_log_config == "/eighty/access.log" + assert not records[1].error_log_config + assert records[1].source == "/more/confs/one.conf"