|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import re |
| 4 | +import urllib.parse |
| 5 | +from typing import TYPE_CHECKING, Any, Union, get_args |
| 6 | + |
| 7 | +from dissect.esedb import EseDB |
| 8 | +from dissect.sql import SQLite3 |
| 9 | +from dissect.util.ts import wintimestamp |
| 10 | + |
| 11 | +from dissect.target.exceptions import FilesystemError, UnsupportedPluginError |
| 12 | +from dissect.target.helpers.record import TargetRecordDescriptor |
| 13 | +from dissect.target.plugin import Plugin, export |
| 14 | +from dissect.target.plugins.apps.browser.browser import BrowserHistoryRecord |
| 15 | +from dissect.target.plugins.apps.browser.edge import EdgePlugin |
| 16 | +from dissect.target.plugins.apps.browser.iexplore import InternetExplorerPlugin |
| 17 | + |
| 18 | +if TYPE_CHECKING: |
| 19 | + from collections.abc import Iterator |
| 20 | + from pathlib import Path |
| 21 | + |
| 22 | + from dissect.esedb.record import Record as EseDBRecord |
| 23 | + from dissect.esedb.table import Table as EseDBTable |
| 24 | + |
| 25 | + from dissect.target.plugins.general.users import UserDetails |
| 26 | + from dissect.target.target import Target |
| 27 | + |
| 28 | +SearchIndexRecord = TargetRecordDescriptor( |
| 29 | + "windows/search/index/entry", |
| 30 | + [ |
| 31 | + ("datetime", "ts"), |
| 32 | + ("datetime", "ts_mtime"), |
| 33 | + ("datetime", "ts_btime"), |
| 34 | + ("datetime", "ts_atime"), |
| 35 | + ("path", "path"), |
| 36 | + ("string", "type"), |
| 37 | + ("filesize", "size"), |
| 38 | + ("string", "data"), |
| 39 | + ("path", "source"), |
| 40 | + ], |
| 41 | +) |
| 42 | + |
| 43 | +SearchIndexActivityRecord = TargetRecordDescriptor( |
| 44 | + "windows/search/index/activity", |
| 45 | + [ |
| 46 | + ("datetime", "ts_start"), |
| 47 | + ("datetime", "ts_end"), |
| 48 | + ("varint", "duration"), |
| 49 | + ("string", "application_name"), |
| 50 | + ("string", "application_id"), |
| 51 | + ("string", "activity_id"), |
| 52 | + ("path", "source"), |
| 53 | + ], |
| 54 | +) |
| 55 | + |
| 56 | +RE_URL = re.compile(r"(?P<browser>.+)\:\/\/\{(?P<sid>.+)\}\/(?P<url>.+)$") |
| 57 | + |
| 58 | +BROWSER_RECORD_MAP = { |
| 59 | + "iehistory": InternetExplorerPlugin.BrowserHistoryRecord, |
| 60 | + "winrt": EdgePlugin.BrowserHistoryRecord, |
| 61 | +} |
| 62 | + |
| 63 | +SearchIndexRecords = Union[SearchIndexRecord, SearchIndexActivityRecord, BrowserHistoryRecord] # noqa: UP007 |
| 64 | + |
| 65 | + |
| 66 | +class SearchIndexPlugin(Plugin): |
| 67 | + """Windows Search Index plugin.""" |
| 68 | + |
| 69 | + SYSTEM_PATHS = ( |
| 70 | + # Windows 11 22H2 (SQLite3) |
| 71 | + "sysvol/ProgramData/Microsoft/Search/Data/Applications/Windows/Windows.db", |
| 72 | + # Windows Vista and Windows 10 (EseDB) |
| 73 | + "sysvol/ProgramData/Microsoft/Search/Data/Applications/Windows/Windows.edb", |
| 74 | + # Windows XP (EseDB) |
| 75 | + "sysvol/Documents and Settings/All Users/Application Data/Microsoft/Search/Data/Applications/Windows/Windows.edb", # noqa: E501 |
| 76 | + ) |
| 77 | + |
| 78 | + USER_PATHS = ( |
| 79 | + # Windows 10 Server Roaming (EseDB / SQLite) |
| 80 | + "AppData/Roaming/Microsoft/Search/Data/Applications/S-1-*/*.*db", |
| 81 | + ) |
| 82 | + |
| 83 | + def __init__(self, target: Target): |
| 84 | + super().__init__(target) |
| 85 | + self.databases = set(self.find_databases()) |
| 86 | + |
| 87 | + def find_databases(self) -> Iterator[tuple[Path, UserDetails | None]]: |
| 88 | + seen = set() |
| 89 | + |
| 90 | + for system_path in self.SYSTEM_PATHS: |
| 91 | + if (path := self.target.fs.path(system_path)).is_file(): |
| 92 | + try: |
| 93 | + if any(seen_file.samefile(path) for seen_file in seen): |
| 94 | + continue |
| 95 | + except FilesystemError: |
| 96 | + pass |
| 97 | + |
| 98 | + seen.add(path) |
| 99 | + yield path.resolve(), None |
| 100 | + |
| 101 | + for user_details in self.target.user_details.all_with_home(): |
| 102 | + for user_path in self.USER_PATHS: |
| 103 | + for path in user_details.home_path.glob(user_path): |
| 104 | + if not path.is_file(): |
| 105 | + continue |
| 106 | + try: |
| 107 | + if any(seen_file.samefile(path) for seen_file in seen): |
| 108 | + continue |
| 109 | + except FilesystemError: |
| 110 | + pass |
| 111 | + |
| 112 | + seen.add(path) |
| 113 | + yield path.resolve(), user_details |
| 114 | + |
| 115 | + def check_compatible(self) -> None: |
| 116 | + if not self.databases: |
| 117 | + raise UnsupportedPluginError("No Windows Search Index database files found on target") |
| 118 | + |
| 119 | + @export(record=get_args(SearchIndexRecords)) |
| 120 | + def search(self) -> Iterator[SearchIndexRecords]: |
| 121 | + """Yield Windows Search Index records. |
| 122 | +
|
| 123 | + Parses ``Windows.edb`` EseDB and ``Windows.db`` SQLite3 databases. Currently does not parse |
| 124 | + ``GatherLogs/SystemIndex/SystemIndex.*.(Crwl|gthr)`` files or ``Windows-gather.db`` and ``Windows-usn.db`` files. |
| 125 | +
|
| 126 | + Windows Search is a standard component of Windows 7 and Windows Vista, and is enabled by default. The standard (non-Windows Server) |
| 127 | + configuration of Windows Search indexes the following paths: ``C:\\Users\\*`` and ``C:\\ProgramData\\Microsoft\\Windows\\Start Menu\\Programs\\*``, |
| 128 | + with some exceptions for certain file extensions (see the linked references for more information). |
| 129 | +
|
| 130 | + The difference between the fields ``System_Date*`` and ``System_Document_Date*`` should be researched further. |
| 131 | + It is unclear what the field ``InvertedOnlyMD5`` is a checksum of (record or file content?). It might be possible |
| 132 | + to correlate the field ``System_FileOwner`` with a ``UserRecordDescriptor``. The field ``System_FileAttributes`` should be |
| 133 | + investigated further. |
| 134 | +
|
| 135 | + No test data available for indexed Outlook emails, this plugin might not be able to handle indexed email messages. |
| 136 | +
|
| 137 | + References: |
| 138 | + - https://learn.microsoft.com/en-us/windows/win32/search/-search-3x-wds-overview |
| 139 | + - https://github.com/libyal/esedb-kb/blob/main/documentation/Windows%20Search.asciidoc |
| 140 | + - https://www.aon.com/en/insights/cyber-labs/windows-search-index-the-forensic-artifact-youve-been-searching-for |
| 141 | + - https://github.com/strozfriedberg/sidr |
| 142 | + - https://devblogs.microsoft.com/windows-search-platform/configuration-and-settings/ |
| 143 | + - https://learn.microsoft.com/en-us/windows/win32/search/-search-3x-wds-included-in-index |
| 144 | + """ # noqa: E501 |
| 145 | + |
| 146 | + for db_path, user_details in self.databases: |
| 147 | + if db_path.suffix == ".edb": |
| 148 | + yield from self.parse_esedb(db_path, user_details) |
| 149 | + |
| 150 | + elif db_path.suffix == ".db": |
| 151 | + yield from self.parse_sqlite(db_path, user_details) |
| 152 | + |
| 153 | + else: |
| 154 | + self.target.log.warning("Unknown Windows Search Index database file %r", db_path) |
| 155 | + |
| 156 | + def parse_esedb(self, path: Path, user_details: UserDetails | None) -> Iterator[SearchIndexRecords]: |
| 157 | + """Parse the EseDB ``SystemIndex_PropertyStore`` table.""" |
| 158 | + |
| 159 | + with path.open("rb") as fh: |
| 160 | + db = EseDB(fh) |
| 161 | + table = db.table("SystemIndex_PropertyStore") |
| 162 | + |
| 163 | + for record in table.records(): |
| 164 | + yield from self.build_record(TableRecord(table, record), user_details, path) |
| 165 | + |
| 166 | + def parse_sqlite(self, path: Path, user_details: UserDetails | None) -> Iterator[SearchIndexRecords]: |
| 167 | + """Parse the SQLite3 ``SystemIndex_1_PropertyStore`` table.""" |
| 168 | + |
| 169 | + with path.open("rb") as fh: |
| 170 | + db = SQLite3(fh) |
| 171 | + |
| 172 | + # ``ColumnId`` is translated using the ``SystemIndex_1_PropertyStore_Metadata`` table. |
| 173 | + columns = { |
| 174 | + row.get("Id"): row.get("UniqueKey", "").split("-", maxsplit=1)[-1] |
| 175 | + for row in db.table("SystemIndex_1_PropertyStore_Metadata").rows() |
| 176 | + } |
| 177 | + |
| 178 | + if not (table := db.table("SystemIndex_1_PropertyStore")): |
| 179 | + self.target.log.warning("Database %s does not have a table called 'SystemIndex_1_PropertyStore'", path) |
| 180 | + return |
| 181 | + |
| 182 | + current_work_id = None |
| 183 | + values = {} |
| 184 | + |
| 185 | + for row in table.rows(): |
| 186 | + work_id = row.get("WorkId") |
| 187 | + if current_work_id is None: |
| 188 | + current_work_id = work_id |
| 189 | + if work_id != current_work_id: |
| 190 | + yield from self.build_record(values, user_details, path) |
| 191 | + current_work_id = work_id |
| 192 | + values = {} |
| 193 | + |
| 194 | + if value := row.get("Value"): |
| 195 | + column_name = columns[row.get("ColumnId")] |
| 196 | + values[column_name] = value |
| 197 | + |
| 198 | + yield from self.build_record(values, user_details, path) |
| 199 | + |
| 200 | + def build_record( |
| 201 | + self, values: dict[str, Any] | TableRecord, user_details: UserDetails | None, db_path: Path |
| 202 | + ) -> Iterator[SearchIndexRecords]: |
| 203 | + """Build a ``SearchIndexRecord``, ``SearchIndexActivityRecord`` or ``HistoryRecord``.""" |
| 204 | + |
| 205 | + if values.get("System_ItemType") == "ActivityHistoryItem": |
| 206 | + yield SearchIndexActivityRecord( |
| 207 | + ts_start=wintimestamp(int.from_bytes(values.get("System_ActivityHistory_StartTime", b""), "little")), |
| 208 | + ts_end=wintimestamp(int.from_bytes(values.get("System_ActivityHistory_EndTime", b""), "little")), |
| 209 | + duration=int.from_bytes(values.get("System_ActivityHistory_ActiveDuration", b""), "little"), |
| 210 | + application_name=values.get("System_Activity_AppDisplayName"), |
| 211 | + application_id=values.get("System_ActivityHistory_AppId"), |
| 212 | + activity_id=values.get("System_ActivityHistory_AppActivityId"), |
| 213 | + source=db_path, |
| 214 | + _target=self.target, |
| 215 | + ) |
| 216 | + |
| 217 | + elif values.get("System_Search_Store") in ("iehistory", "winrt"): |
| 218 | + system_itemurl = values.get("System_ItemUrl") |
| 219 | + |
| 220 | + if not system_itemurl or not (match := RE_URL.match(system_itemurl)): |
| 221 | + self.target.log.warning( |
| 222 | + "Unable to parse System_ItemUrl: %r (%r) in %s", system_itemurl, values, db_path |
| 223 | + ) |
| 224 | + return |
| 225 | + |
| 226 | + browser, sid, url = match.groupdict().values() |
| 227 | + |
| 228 | + if not (CurrentBrowserHistoryRecord := BROWSER_RECORD_MAP.get(browser)): |
| 229 | + self.target.log.warning( |
| 230 | + "Unable to determine browser history type for %r (%r) in %s", browser, system_itemurl, db_path |
| 231 | + ) |
| 232 | + return |
| 233 | + |
| 234 | + user = None |
| 235 | + if sid and (sid_user_details := self.target.user_details.find(sid)): |
| 236 | + user = sid_user_details.user |
| 237 | + |
| 238 | + if not user and user_details: |
| 239 | + user = user_details.user |
| 240 | + |
| 241 | + url = values.get("System_Link_TargetUrl") or url |
| 242 | + host = None |
| 243 | + |
| 244 | + if url: |
| 245 | + try: |
| 246 | + host = urllib.parse.urlparse(url).hostname |
| 247 | + except Exception: |
| 248 | + pass |
| 249 | + |
| 250 | + yield CurrentBrowserHistoryRecord( |
| 251 | + ts=wintimestamp(int.from_bytes(values.get("System_Link_DateVisited", b""), "little")), |
| 252 | + browser=browser, |
| 253 | + url=url, |
| 254 | + title=values.get("System_Title"), |
| 255 | + host=host, |
| 256 | + source=db_path, |
| 257 | + _user=user, |
| 258 | + _target=self.target, |
| 259 | + ) |
| 260 | + |
| 261 | + # System_Search_Store = "file" |
| 262 | + else: |
| 263 | + yield SearchIndexRecord( |
| 264 | + ts=wintimestamp(int.from_bytes(values.get("System_Search_GatherTime", b""), "little")), |
| 265 | + ts_mtime=wintimestamp(int.from_bytes(values.get("System_DateModified", b""), "little")), |
| 266 | + ts_btime=wintimestamp(int.from_bytes(values.get("System_DateCreated", b""), "little")), |
| 267 | + ts_atime=wintimestamp(int.from_bytes(values.get("System_DateAccessed", b""), "little")), |
| 268 | + path=values.get("System_ItemPathDisplay"), |
| 269 | + type=values.get("System_MIMEType") |
| 270 | + or values.get("System_ContentType") |
| 271 | + or values.get("System_ItemTypeText"), |
| 272 | + size=int.from_bytes(b_size, "little") if (b_size := values.get("System_Size")) else None, |
| 273 | + data=values.get("System_Search_AutoSummary"), |
| 274 | + source=db_path, |
| 275 | + _target=self.target, |
| 276 | + ) |
| 277 | + |
| 278 | + |
| 279 | +class TableRecord: |
| 280 | + def __init__(self, table: EseDBTable, record: EseDBRecord): |
| 281 | + self.table = table |
| 282 | + self.record = record |
| 283 | + |
| 284 | + # Translates e.g. ``System_DateModified`` to ``15F-System_DateModified`` as these column name prefixes might |
| 285 | + # be dynamic based on the system version. |
| 286 | + self.columns = {col.split("-", maxsplit=1)[-1]: col for col in table.column_names} |
| 287 | + |
| 288 | + def get(self, key: str, default: Any | None = None) -> Any: |
| 289 | + return self.record.get(self.columns.get(key, default)) |
0 commit comments