diff --git a/dissect/target/plugins/os/windows/generic.py b/dissect/target/plugins/os/windows/generic.py index 673a29f67d..d200b865f6 100644 --- a/dissect/target/plugins/os/windows/generic.py +++ b/dissect/target/plugins/os/windows/generic.py @@ -578,8 +578,9 @@ def machine_sid(self) -> Iterator[ComputerSidRecord]: sid=f"S-1-5-21-{sid[0]}-{sid[1]}-{sid[2]}", _target=self.target, ) - except (RegistryError, struct.error): - self.target.log.exception("Cannot read machine SID from registry") + except (RegistryError, struct.error) as e: + self.target.log.error("Cannot read machine SID from registry") # noqa: TRY400 + self.target.log.debug("", exc_info=e) return None @export(record=ComputerSidRecord) diff --git a/dissect/target/plugins/os/windows/search.py b/dissect/target/plugins/os/windows/search.py new file mode 100644 index 0000000000..5208e9f9d8 --- /dev/null +++ b/dissect/target/plugins/os/windows/search.py @@ -0,0 +1,289 @@ +from __future__ import annotations + +import re +import urllib.parse +from typing import TYPE_CHECKING, Any, Union, get_args + +from dissect.esedb import EseDB +from dissect.sql import SQLite3 +from dissect.util.ts import wintimestamp + +from dissect.target.exceptions import FilesystemError, UnsupportedPluginError +from dissect.target.helpers.record import TargetRecordDescriptor +from dissect.target.plugin import Plugin, export +from dissect.target.plugins.apps.browser.browser import BrowserHistoryRecord +from dissect.target.plugins.apps.browser.edge import EdgePlugin +from dissect.target.plugins.apps.browser.iexplore import InternetExplorerPlugin + +if TYPE_CHECKING: + from collections.abc import Iterator + from pathlib import Path + + from dissect.esedb.record import Record as EseDBRecord + from dissect.esedb.table import Table as EseDBTable + + from dissect.target.plugins.general.users import UserDetails + from dissect.target.target import Target + +SearchIndexRecord = TargetRecordDescriptor( + "windows/search/index/entry", + [ + ("datetime", "ts"), + ("datetime", "ts_mtime"), + ("datetime", "ts_btime"), + ("datetime", "ts_atime"), + ("path", "path"), + ("string", "type"), + ("filesize", "size"), + ("string", "data"), + ("path", "source"), + ], +) + +SearchIndexActivityRecord = TargetRecordDescriptor( + "windows/search/index/activity", + [ + ("datetime", "ts_start"), + ("datetime", "ts_end"), + ("varint", "duration"), + ("string", "application_name"), + ("string", "application_id"), + ("string", "activity_id"), + ("path", "source"), + ], +) + +RE_URL = re.compile(r"(?P.+)\:\/\/\{(?P.+)\}\/(?P.+)$") + +BROWSER_RECORD_MAP = { + "iehistory": InternetExplorerPlugin.BrowserHistoryRecord, + "winrt": EdgePlugin.BrowserHistoryRecord, +} + +SearchIndexRecords = Union[SearchIndexRecord, SearchIndexActivityRecord, BrowserHistoryRecord] # noqa: UP007 + + +class SearchIndexPlugin(Plugin): + """Windows Search Index plugin.""" + + SYSTEM_PATHS = ( + # Windows 11 22H2 (SQLite3) + "sysvol/ProgramData/Microsoft/Search/Data/Applications/Windows/Windows.db", + # Windows Vista and Windows 10 (EseDB) + "sysvol/ProgramData/Microsoft/Search/Data/Applications/Windows/Windows.edb", + # Windows XP (EseDB) + "sysvol/Documents and Settings/All Users/Application Data/Microsoft/Search/Data/Applications/Windows/Windows.edb", # noqa: E501 + ) + + USER_PATHS = ( + # Windows 10 Server Roaming (EseDB / SQLite) + "AppData/Roaming/Microsoft/Search/Data/Applications/S-1-*/*.*db", + ) + + def __init__(self, target: Target): + super().__init__(target) + self.databases = set(self.find_databases()) + + def find_databases(self) -> Iterator[tuple[Path, UserDetails | None]]: + seen = set() + + for system_path in self.SYSTEM_PATHS: + if (path := self.target.fs.path(system_path)).is_file(): + try: + if any(seen_file.samefile(path) for seen_file in seen): + continue + except FilesystemError: + pass + + seen.add(path) + yield path.resolve(), None + + for user_details in self.target.user_details.all_with_home(): + for user_path in self.USER_PATHS: + for path in user_details.home_path.glob(user_path): + if not path.is_file(): + continue + try: + if any(seen_file.samefile(path) for seen_file in seen): + continue + except FilesystemError: + pass + + seen.add(path) + yield path.resolve(), user_details + + def check_compatible(self) -> None: + if not self.databases: + raise UnsupportedPluginError("No Windows Search Index database files found on target") + + @export(record=get_args(SearchIndexRecords)) + def search(self) -> Iterator[SearchIndexRecords]: + """Yield Windows Search Index records. + + Parses ``Windows.edb`` EseDB and ``Windows.db`` SQLite3 databases. Currently does not parse + ``GatherLogs/SystemIndex/SystemIndex.*.(Crwl|gthr)`` files or ``Windows-gather.db`` and ``Windows-usn.db`` files. + + Windows Search is a standard component of Windows 7 and Windows Vista, and is enabled by default. The standard (non-Windows Server) + configuration of Windows Search indexes the following paths: ``C:\\Users\\*`` and ``C:\\ProgramData\\Microsoft\\Windows\\Start Menu\\Programs\\*``, + with some exceptions for certain file extensions (see the linked references for more information). + + The difference between the fields ``System_Date*`` and ``System_Document_Date*`` should be researched further. + It is unclear what the field ``InvertedOnlyMD5`` is a checksum of (record or file content?). It might be possible + to correlate the field ``System_FileOwner`` with a ``UserRecordDescriptor``. The field ``System_FileAttributes`` should be + investigated further. + + No test data available for indexed Outlook emails, this plugin might not be able to handle indexed email messages. + + References: + - https://learn.microsoft.com/en-us/windows/win32/search/-search-3x-wds-overview + - https://github.com/libyal/esedb-kb/blob/main/documentation/Windows%20Search.asciidoc + - https://www.aon.com/en/insights/cyber-labs/windows-search-index-the-forensic-artifact-youve-been-searching-for + - https://github.com/strozfriedberg/sidr + - https://devblogs.microsoft.com/windows-search-platform/configuration-and-settings/ + - https://learn.microsoft.com/en-us/windows/win32/search/-search-3x-wds-included-in-index + """ # noqa: E501 + + for db_path, user_details in self.databases: + if db_path.suffix == ".edb": + yield from self.parse_esedb(db_path, user_details) + + elif db_path.suffix == ".db": + yield from self.parse_sqlite(db_path, user_details) + + else: + self.target.log.warning("Unknown Windows Search Index database file %r", db_path) + + def parse_esedb(self, path: Path, user_details: UserDetails | None) -> Iterator[SearchIndexRecords]: + """Parse the EseDB ``SystemIndex_PropertyStore`` table.""" + + with path.open("rb") as fh: + db = EseDB(fh) + table = db.table("SystemIndex_PropertyStore") + + for record in table.records(): + yield from self.build_record(TableRecord(table, record), user_details, path) + + def parse_sqlite(self, path: Path, user_details: UserDetails | None) -> Iterator[SearchIndexRecords]: + """Parse the SQLite3 ``SystemIndex_1_PropertyStore`` table.""" + + with path.open("rb") as fh: + db = SQLite3(fh) + + # ``ColumnId`` is translated using the ``SystemIndex_1_PropertyStore_Metadata`` table. + columns = { + row.get("Id"): row.get("UniqueKey", "").split("-", maxsplit=1)[-1] + for row in db.table("SystemIndex_1_PropertyStore_Metadata").rows() + } + + if not (table := db.table("SystemIndex_1_PropertyStore")): + self.target.log.warning("Database %s does not have a table called 'SystemIndex_1_PropertyStore'", path) + return + + current_work_id = None + values = {} + + for row in table.rows(): + work_id = row.get("WorkId") + if current_work_id is None: + current_work_id = work_id + if work_id != current_work_id: + yield from self.build_record(values, user_details, path) + current_work_id = work_id + values = {} + + if value := row.get("Value"): + column_name = columns[row.get("ColumnId")] + values[column_name] = value + + yield from self.build_record(values, user_details, path) + + def build_record( + self, values: dict[str, Any] | TableRecord, user_details: UserDetails | None, db_path: Path + ) -> Iterator[SearchIndexRecords]: + """Build a ``SearchIndexRecord``, ``SearchIndexActivityRecord`` or ``HistoryRecord``.""" + + if values.get("System_ItemType") == "ActivityHistoryItem": + yield SearchIndexActivityRecord( + ts_start=wintimestamp(int.from_bytes(values.get("System_ActivityHistory_StartTime", b""), "little")), + ts_end=wintimestamp(int.from_bytes(values.get("System_ActivityHistory_EndTime", b""), "little")), + duration=int.from_bytes(values.get("System_ActivityHistory_ActiveDuration", b""), "little"), + application_name=values.get("System_Activity_AppDisplayName"), + application_id=values.get("System_ActivityHistory_AppId"), + activity_id=values.get("System_ActivityHistory_AppActivityId"), + source=db_path, + _target=self.target, + ) + + elif values.get("System_Search_Store") in ("iehistory", "winrt"): + system_itemurl = values.get("System_ItemUrl") + + if not system_itemurl or not (match := RE_URL.match(system_itemurl)): + self.target.log.warning( + "Unable to parse System_ItemUrl: %r (%r) in %s", system_itemurl, values, db_path + ) + return + + browser, sid, url = match.groupdict().values() + + if not (CurrentBrowserHistoryRecord := BROWSER_RECORD_MAP.get(browser)): + self.target.log.warning( + "Unable to determine browser history type for %r (%r) in %s", browser, system_itemurl, db_path + ) + return + + user = None + if sid and (sid_user_details := self.target.user_details.find(sid)): + user = sid_user_details.user + + if not user and user_details: + user = user_details.user + + url = values.get("System_Link_TargetUrl") or url + host = None + + if url: + try: + host = urllib.parse.urlparse(url).hostname + except Exception: + pass + + yield CurrentBrowserHistoryRecord( + ts=wintimestamp(int.from_bytes(values.get("System_Link_DateVisited", b""), "little")), + browser=browser, + url=url, + title=values.get("System_Title"), + host=host, + source=db_path, + _user=user, + _target=self.target, + ) + + # System_Search_Store = "file" + else: + yield SearchIndexRecord( + ts=wintimestamp(int.from_bytes(values.get("System_Search_GatherTime", b""), "little")), + ts_mtime=wintimestamp(int.from_bytes(values.get("System_DateModified", b""), "little")), + ts_btime=wintimestamp(int.from_bytes(values.get("System_DateCreated", b""), "little")), + ts_atime=wintimestamp(int.from_bytes(values.get("System_DateAccessed", b""), "little")), + path=values.get("System_ItemPathDisplay"), + type=values.get("System_MIMEType") + or values.get("System_ContentType") + or values.get("System_ItemTypeText"), + size=int.from_bytes(b_size, "little") if (b_size := values.get("System_Size")) else None, + data=values.get("System_Search_AutoSummary"), + source=db_path, + _target=self.target, + ) + + +class TableRecord: + def __init__(self, table: EseDBTable, record: EseDBRecord): + self.table = table + self.record = record + + # Translates e.g. ``System_DateModified`` to ``15F-System_DateModified`` as these column name prefixes might + # be dynamic based on the system version. + self.columns = {col.split("-", maxsplit=1)[-1]: col for col in table.column_names} + + def get(self, key: str, default: Any | None = None) -> Any: + return self.record.get(self.columns.get(key, default)) diff --git a/pyproject.toml b/pyproject.toml index 7f394f4985..01d135b966 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ full = [ "dissect.cim>=3,<4", "dissect.clfs>=1,<2", "dissect.cramfs>=1,<2", - "dissect.esedb>=3,<4", + "dissect.esedb>=3.17.dev1,<4", # TODO: update on release! "dissect.etl>=3,<4", "dissect.extfs>=3,<4", "dissect.fat>=3,<4", diff --git a/tests/_data/plugins/os/windows/search/Windows.db b/tests/_data/plugins/os/windows/search/Windows.db new file mode 100644 index 0000000000..750ac91c90 --- /dev/null +++ b/tests/_data/plugins/os/windows/search/Windows.db @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e655a1af9eb3386ffdc7e19aa8dcda06dfa2c35a1c3b657ac4a9c1c13c83f020 +size 2957312 diff --git a/tests/_data/plugins/os/windows/search/Windows.edb b/tests/_data/plugins/os/windows/search/Windows.edb new file mode 100644 index 0000000000..8d307e9520 --- /dev/null +++ b/tests/_data/plugins/os/windows/search/Windows.edb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10dd5fc05c2d19aa1fa4a705142e413fc5a4af17ae8e5e4909164262f9de7c66 +size 33554432 diff --git a/tests/plugins/os/windows/test_search.py b/tests/plugins/os/windows/test_search.py new file mode 100644 index 0000000000..a427e4e41b --- /dev/null +++ b/tests/plugins/os/windows/test_search.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from typing import TYPE_CHECKING + +from dissect.target.plugins.os.windows.search import SearchIndexPlugin +from tests._utils import absolute_path +from tests.conftest import add_win_user + +if TYPE_CHECKING: + from dissect.target.filesystem import VirtualFilesystem + from dissect.target.helpers.regutil import VirtualHive + from dissect.target.target import Target + + +def test_windows_search_esedb( + target_win: Target, fs_win: VirtualFilesystem, hive_hklm: VirtualHive, hive_hku: VirtualHive +) -> None: + """Test Windows Search EseDB parsing. + + Resources: + - https://github.com/strozfriedberg/sidr/tree/main/tests/testdata + """ + + user_sid = "S-1-5-21-29705265-400737687-482427116-1001" + + add_win_user( + hive_hklm, + hive_hku, + target_win, + sid=user_sid, + home="C:\\Users\\User", + ) + + fs_win.map_file( + "ProgramData/Microsoft/Search/Data/Applications/Windows/Windows.edb", + str(absolute_path("_data/plugins/os/windows/search/Windows.edb")), + ) + + fs_win.map_file( + f"Users/User/AppData/Roaming/Microsoft/Search/Data/Applications/{user_sid}/Windows.edb", + str(absolute_path("_data/plugins/os/windows/search/Windows.edb")), + ) + + plugin = target_win.add_plugin(SearchIndexPlugin) + records = list(target_win.search()) + + # Test find_databases deduplication + assert len(plugin.databases) == 1 + + len_records = len(records) + assert len_records == 1183 - 2 # Database contains two empty records. + + # SearchIndexActivityRecord + assert records[-1].ts_start == datetime(2023, 2, 16, 17, 30, 35, tzinfo=timezone.utc) + assert records[-1].ts_end == datetime(2023, 2, 16, 17, 30, 37, tzinfo=timezone.utc) + assert records[-1].duration == 20_000_000 # equals two seconds + assert records[-1].application_name == "PowerPoint 2016" + assert ( + records[-1].application_id == "{6D809377-6AF0-444B-8957-A3773F02200E}\\Microsoft Office\\Office16\\POWERPNT.EXE" + ) + assert ( + records[-1].activity_id == "ECB32AF3-1440-4086-94E3-5311F97F89C4\\{ThisPCDesktopFolder}\\This is test PPT.pptx" + ) + assert records[-1].source == "\\sysvol\\ProgramData\\Microsoft\\Search\\Data\\Applications\\Windows\\Windows.edb" + + # SearchIndexRecord (file) + assert records[1175].ts == datetime(2023, 2, 16, 14, 36, 23, 877518, tzinfo=timezone.utc) + assert records[1175].ts_mtime == datetime(2023, 2, 16, 14, 36, 14, 922361, tzinfo=timezone.utc) + assert records[1175].ts_btime == datetime(2023, 2, 16, 14, 35, 23, 656454, tzinfo=timezone.utc) + assert records[1175].ts_atime == datetime(2023, 2, 16, 14, 36, 22, 893101, tzinfo=timezone.utc) + assert records[1175].path == "C:\\Users\\testuser\\Desktop\\StrozFriedberg-Example.txt" + assert records[1175].type == "text/plain" + assert records[1175].size == 50 + assert records[1175].data == "Example File from Stroz Friedberg.\r\nHappy Testing!" + assert records[1175].source == "\\sysvol\\ProgramData\\Microsoft\\Search\\Data\\Applications\\Windows\\Windows.edb" + + # SearchIndexRecord (file) + assert records[1120].ts == datetime(2023, 2, 14, 15, 14, 40, 984222, tzinfo=timezone.utc) + assert records[1120].ts_mtime == datetime(2023, 2, 14, 15, 3, 59, 352678, tzinfo=timezone.utc) + assert records[1120].ts_btime == datetime(2023, 2, 14, 13, 56, 55, 827263, tzinfo=timezone.utc) + assert records[1120].ts_atime == datetime(2023, 2, 14, 15, 12, 4, 400179, tzinfo=timezone.utc) + assert records[1120].path == "C:\\Users\\testuser\\Desktop\\Content-Check\\Malicious.js" + assert records[1120].type == "JavaScript File" # no mimetype available? + assert records[1120].size == 511 + assert ( + records[1120].data + == "Line 1\r\nLine 2\r\nLine 3\r\nLine 4\r\nLine 5\r\nLine 6\r\nLine 7\r\nLine 8\r\nLine 9\r\nLine 10\r\nLine 11\r\nLine 12\r\nLine 13\r\nLine 14\r\nLine 15\r\nLine 16\r\nLine 17\r\nLine 18\r\nLine 19\r\nLine 20\r\nLine 21\r\nLine 22\r\nLine 23\r\nLine 24\r\nLine 25\r\nLine 26\r\nLine 27\r\nLine 28\r\nLine 29\r\nLine 30\r\nLine 31\r\nLine 32\r\nLine 33\r\nLine 34\r\nLine 35\r\nLine 36\r\nLine 37\r\nLine 38\r\nLine 39\r\nLine 40\r\nLine 41\r\nLine 42\r\nLine 43\r\nLine 44\r\nLine 45\r\nLine 46\r\nLine 47\r\nLine 48\r\nLine 49\r\nLine 50\r\nLine 51\r\nLine 52\r\nLine 53\r\nLine 54\r\nLine 55\r\nLine 56\r\nLine 57\r\nLine \r\n" # noqa: E501 + ) + assert records[1120].source == "\\sysvol\\ProgramData\\Microsoft\\Search\\Data\\Applications\\Windows\\Windows.edb" + + # SearchIndexRecord (folder) + assert records[1116].ts == datetime(2023, 2, 14, 13, 59, 31, 356632, tzinfo=timezone.utc) + assert records[1116].ts_mtime == datetime(2023, 2, 14, 13, 57, 29, 517132, tzinfo=timezone.utc) + assert records[1116].ts_btime == datetime(2023, 2, 14, 13, 57, 8, 689777, tzinfo=timezone.utc) + assert records[1116].ts_atime == datetime(2023, 2, 14, 13, 59, 31, 340546, tzinfo=timezone.utc) + assert records[1116].path == "C:\\Users\\testuser\\Desktop\\Content-Check" + assert records[1116].type == "File folder" # no mimetype available? + assert not records[1116].size + assert not records[1116].data + assert records[1116].source == "\\sysvol\\ProgramData\\Microsoft\\Search\\Data\\Applications\\Windows\\Windows.edb" + + # BrowserHistoryRecord (iehistory) + assert records[995].ts == datetime(2023, 2, 13, 14, 17, 22, 448000, tzinfo=timezone.utc) + assert records[995].browser == "iehistory" + assert ( + records[995].url + == "https://support.microsoft.com/en-us/microsoft-edge/this-website-doesn-t-work-in-internet-explorer-8f5fc675-cd47-414c-9535-12821ddfc554?ui=en-us&rs=en-us&ad=us" + ) + assert records[995].host == "support.microsoft.com" + assert records[995].title == "This website doesn't work in Internet Explorer - Microsoft Support" + assert records[995].source == "\\sysvol\\ProgramData\\Microsoft\\Search\\Data\\Applications\\Windows\\Windows.edb" + assert records[995].user_id == user_sid + + +def test_windows_search_sqlite( + target_win: Target, fs_win: VirtualFilesystem, hive_hklm: VirtualHive, hive_hku: VirtualHive +) -> None: + """Test Windows 11 Search SQLite3 parsing. + + Resources: + - https://github.com/strozfriedberg/sidr/tree/main/tests/testdata + """ + + user_sid = "S-1-5-21-4268361623-692440835-3372367631-1001" + + add_win_user( + hive_hklm, + hive_hku, + target_win, + sid=user_sid, + home="C:\\Users\\User", + ) + + fs_win.map_file( + "ProgramData/Microsoft/Search/Data/Applications/Windows/Windows.db", + str(absolute_path("_data/plugins/os/windows/search/Windows.db")), + ) + + target_win.add_plugin(SearchIndexPlugin) + records = list(target_win.search()) + + len_records = len(records) + assert len_records == 839 - 1 # Database contains one empty record. + + # SearchIndexActivityRecord + assert records[698].ts_start == datetime(2023, 1, 30, 22, 14, 18, tzinfo=timezone.utc) + assert records[698].ts_end == datetime(2023, 1, 30, 22, 14, 20, tzinfo=timezone.utc) + assert records[698].duration == 20_000_000 + assert records[698].application_name == "notepad++.exe" + assert records[698].application_id == "{6D809377-6AF0-444B-8957-A3773F02200E}\\Notepad++\\notepad++.exe" + assert records[698].activity_id == "ECB32AF3-1440-4086-94E3-5311F97F89C4\\{Public}\\Threat\\becon.xml" + assert records[698].source == "\\sysvol\\ProgramData\\Microsoft\\Search\\Data\\Applications\\Windows\\Windows.db" + + # SearchIndexRecord (file) + assert records[837].ts == datetime(2023, 1, 31, 2, 45, 2, 871614, tzinfo=timezone.utc) + assert records[837].ts_mtime == datetime(2023, 1, 31, 2, 45, 2, 56444, tzinfo=timezone.utc) + assert records[837].ts_btime == datetime(2023, 1, 31, 2, 26, 28, 898306, tzinfo=timezone.utc) + assert records[837].ts_atime == datetime(2023, 1, 31, 2, 45, 2, 56444, tzinfo=timezone.utc) + assert records[837].path == "C:\\Users\\Public\\malware\\New-beacon.xml" + assert records[837].type == "text/xml" + assert int(records[837].size) == 174 + assert not records[837].data + assert records[837].source == "\\sysvol\\ProgramData\\Microsoft\\Search\\Data\\Applications\\Windows\\Windows.db" + + # BrowserHistoryRecord (edge) + assert records[711].ts == datetime(2023, 1, 31, 0, 9, 47, 972897, tzinfo=timezone.utc) + assert records[711].browser == "winrt" + assert ( + records[711].url + == "https://www.bing.com/search?q=install+chrome&cvid=2ce0f71581824fda82398075bb250924&aqs=edge.0.0j69i57j0l7.2774j0j7&FORM=ANNTA0&PC=U531" + ) + assert records[711].host == "www.bing.com" + assert records[711].source == "\\sysvol\\ProgramData\\Microsoft\\Search\\Data\\Applications\\Windows\\Windows.db" + assert records[711].user_id == user_sid