Skip to content

Commit 7560eee

Browse files
JSCU-CNISchamper
andauthored
Add Windows Search Index plugin (#1254)
Co-authored-by: Erik Schamper <[email protected]>
1 parent 0e1c75f commit 7560eee

File tree

6 files changed

+474
-3
lines changed

6 files changed

+474
-3
lines changed

dissect/target/plugins/os/windows/generic.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,9 @@ def machine_sid(self) -> Iterator[ComputerSidRecord]:
578578
sid=f"S-1-5-21-{sid[0]}-{sid[1]}-{sid[2]}",
579579
_target=self.target,
580580
)
581-
except (RegistryError, struct.error):
582-
self.target.log.exception("Cannot read machine SID from registry")
581+
except (RegistryError, struct.error) as e:
582+
self.target.log.error("Cannot read machine SID from registry") # noqa: TRY400
583+
self.target.log.debug("", exc_info=e)
583584
return None
584585

585586
@export(record=ComputerSidRecord)
Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
from __future__ import annotations
2+
3+
import re
4+
import urllib.parse
5+
from typing import TYPE_CHECKING, Any, Union, get_args
6+
7+
from dissect.esedb import EseDB
8+
from dissect.sql import SQLite3
9+
from dissect.util.ts import wintimestamp
10+
11+
from dissect.target.exceptions import FilesystemError, UnsupportedPluginError
12+
from dissect.target.helpers.record import TargetRecordDescriptor
13+
from dissect.target.plugin import Plugin, export
14+
from dissect.target.plugins.apps.browser.browser import BrowserHistoryRecord
15+
from dissect.target.plugins.apps.browser.edge import EdgePlugin
16+
from dissect.target.plugins.apps.browser.iexplore import InternetExplorerPlugin
17+
18+
if TYPE_CHECKING:
19+
from collections.abc import Iterator
20+
from pathlib import Path
21+
22+
from dissect.esedb.record import Record as EseDBRecord
23+
from dissect.esedb.table import Table as EseDBTable
24+
25+
from dissect.target.plugins.general.users import UserDetails
26+
from dissect.target.target import Target
27+
28+
SearchIndexRecord = TargetRecordDescriptor(
29+
"windows/search/index/entry",
30+
[
31+
("datetime", "ts"),
32+
("datetime", "ts_mtime"),
33+
("datetime", "ts_btime"),
34+
("datetime", "ts_atime"),
35+
("path", "path"),
36+
("string", "type"),
37+
("filesize", "size"),
38+
("string", "data"),
39+
("path", "source"),
40+
],
41+
)
42+
43+
SearchIndexActivityRecord = TargetRecordDescriptor(
44+
"windows/search/index/activity",
45+
[
46+
("datetime", "ts_start"),
47+
("datetime", "ts_end"),
48+
("varint", "duration"),
49+
("string", "application_name"),
50+
("string", "application_id"),
51+
("string", "activity_id"),
52+
("path", "source"),
53+
],
54+
)
55+
56+
RE_URL = re.compile(r"(?P<browser>.+)\:\/\/\{(?P<sid>.+)\}\/(?P<url>.+)$")
57+
58+
BROWSER_RECORD_MAP = {
59+
"iehistory": InternetExplorerPlugin.BrowserHistoryRecord,
60+
"winrt": EdgePlugin.BrowserHistoryRecord,
61+
}
62+
63+
SearchIndexRecords = Union[SearchIndexRecord, SearchIndexActivityRecord, BrowserHistoryRecord] # noqa: UP007
64+
65+
66+
class SearchIndexPlugin(Plugin):
67+
"""Windows Search Index plugin."""
68+
69+
SYSTEM_PATHS = (
70+
# Windows 11 22H2 (SQLite3)
71+
"sysvol/ProgramData/Microsoft/Search/Data/Applications/Windows/Windows.db",
72+
# Windows Vista and Windows 10 (EseDB)
73+
"sysvol/ProgramData/Microsoft/Search/Data/Applications/Windows/Windows.edb",
74+
# Windows XP (EseDB)
75+
"sysvol/Documents and Settings/All Users/Application Data/Microsoft/Search/Data/Applications/Windows/Windows.edb", # noqa: E501
76+
)
77+
78+
USER_PATHS = (
79+
# Windows 10 Server Roaming (EseDB / SQLite)
80+
"AppData/Roaming/Microsoft/Search/Data/Applications/S-1-*/*.*db",
81+
)
82+
83+
def __init__(self, target: Target):
84+
super().__init__(target)
85+
self.databases = set(self.find_databases())
86+
87+
def find_databases(self) -> Iterator[tuple[Path, UserDetails | None]]:
88+
seen = set()
89+
90+
for system_path in self.SYSTEM_PATHS:
91+
if (path := self.target.fs.path(system_path)).is_file():
92+
try:
93+
if any(seen_file.samefile(path) for seen_file in seen):
94+
continue
95+
except FilesystemError:
96+
pass
97+
98+
seen.add(path)
99+
yield path.resolve(), None
100+
101+
for user_details in self.target.user_details.all_with_home():
102+
for user_path in self.USER_PATHS:
103+
for path in user_details.home_path.glob(user_path):
104+
if not path.is_file():
105+
continue
106+
try:
107+
if any(seen_file.samefile(path) for seen_file in seen):
108+
continue
109+
except FilesystemError:
110+
pass
111+
112+
seen.add(path)
113+
yield path.resolve(), user_details
114+
115+
def check_compatible(self) -> None:
116+
if not self.databases:
117+
raise UnsupportedPluginError("No Windows Search Index database files found on target")
118+
119+
@export(record=get_args(SearchIndexRecords))
120+
def search(self) -> Iterator[SearchIndexRecords]:
121+
"""Yield Windows Search Index records.
122+
123+
Parses ``Windows.edb`` EseDB and ``Windows.db`` SQLite3 databases. Currently does not parse
124+
``GatherLogs/SystemIndex/SystemIndex.*.(Crwl|gthr)`` files or ``Windows-gather.db`` and ``Windows-usn.db`` files.
125+
126+
Windows Search is a standard component of Windows 7 and Windows Vista, and is enabled by default. The standard (non-Windows Server)
127+
configuration of Windows Search indexes the following paths: ``C:\\Users\\*`` and ``C:\\ProgramData\\Microsoft\\Windows\\Start Menu\\Programs\\*``,
128+
with some exceptions for certain file extensions (see the linked references for more information).
129+
130+
The difference between the fields ``System_Date*`` and ``System_Document_Date*`` should be researched further.
131+
It is unclear what the field ``InvertedOnlyMD5`` is a checksum of (record or file content?). It might be possible
132+
to correlate the field ``System_FileOwner`` with a ``UserRecordDescriptor``. The field ``System_FileAttributes`` should be
133+
investigated further.
134+
135+
No test data available for indexed Outlook emails, this plugin might not be able to handle indexed email messages.
136+
137+
References:
138+
- https://learn.microsoft.com/en-us/windows/win32/search/-search-3x-wds-overview
139+
- https://github.com/libyal/esedb-kb/blob/main/documentation/Windows%20Search.asciidoc
140+
- https://www.aon.com/en/insights/cyber-labs/windows-search-index-the-forensic-artifact-youve-been-searching-for
141+
- https://github.com/strozfriedberg/sidr
142+
- https://devblogs.microsoft.com/windows-search-platform/configuration-and-settings/
143+
- https://learn.microsoft.com/en-us/windows/win32/search/-search-3x-wds-included-in-index
144+
""" # noqa: E501
145+
146+
for db_path, user_details in self.databases:
147+
if db_path.suffix == ".edb":
148+
yield from self.parse_esedb(db_path, user_details)
149+
150+
elif db_path.suffix == ".db":
151+
yield from self.parse_sqlite(db_path, user_details)
152+
153+
else:
154+
self.target.log.warning("Unknown Windows Search Index database file %r", db_path)
155+
156+
def parse_esedb(self, path: Path, user_details: UserDetails | None) -> Iterator[SearchIndexRecords]:
157+
"""Parse the EseDB ``SystemIndex_PropertyStore`` table."""
158+
159+
with path.open("rb") as fh:
160+
db = EseDB(fh)
161+
table = db.table("SystemIndex_PropertyStore")
162+
163+
for record in table.records():
164+
yield from self.build_record(TableRecord(table, record), user_details, path)
165+
166+
def parse_sqlite(self, path: Path, user_details: UserDetails | None) -> Iterator[SearchIndexRecords]:
167+
"""Parse the SQLite3 ``SystemIndex_1_PropertyStore`` table."""
168+
169+
with path.open("rb") as fh:
170+
db = SQLite3(fh)
171+
172+
# ``ColumnId`` is translated using the ``SystemIndex_1_PropertyStore_Metadata`` table.
173+
columns = {
174+
row.get("Id"): row.get("UniqueKey", "").split("-", maxsplit=1)[-1]
175+
for row in db.table("SystemIndex_1_PropertyStore_Metadata").rows()
176+
}
177+
178+
if not (table := db.table("SystemIndex_1_PropertyStore")):
179+
self.target.log.warning("Database %s does not have a table called 'SystemIndex_1_PropertyStore'", path)
180+
return
181+
182+
current_work_id = None
183+
values = {}
184+
185+
for row in table.rows():
186+
work_id = row.get("WorkId")
187+
if current_work_id is None:
188+
current_work_id = work_id
189+
if work_id != current_work_id:
190+
yield from self.build_record(values, user_details, path)
191+
current_work_id = work_id
192+
values = {}
193+
194+
if value := row.get("Value"):
195+
column_name = columns[row.get("ColumnId")]
196+
values[column_name] = value
197+
198+
yield from self.build_record(values, user_details, path)
199+
200+
def build_record(
201+
self, values: dict[str, Any] | TableRecord, user_details: UserDetails | None, db_path: Path
202+
) -> Iterator[SearchIndexRecords]:
203+
"""Build a ``SearchIndexRecord``, ``SearchIndexActivityRecord`` or ``HistoryRecord``."""
204+
205+
if values.get("System_ItemType") == "ActivityHistoryItem":
206+
yield SearchIndexActivityRecord(
207+
ts_start=wintimestamp(int.from_bytes(values.get("System_ActivityHistory_StartTime", b""), "little")),
208+
ts_end=wintimestamp(int.from_bytes(values.get("System_ActivityHistory_EndTime", b""), "little")),
209+
duration=int.from_bytes(values.get("System_ActivityHistory_ActiveDuration", b""), "little"),
210+
application_name=values.get("System_Activity_AppDisplayName"),
211+
application_id=values.get("System_ActivityHistory_AppId"),
212+
activity_id=values.get("System_ActivityHistory_AppActivityId"),
213+
source=db_path,
214+
_target=self.target,
215+
)
216+
217+
elif values.get("System_Search_Store") in ("iehistory", "winrt"):
218+
system_itemurl = values.get("System_ItemUrl")
219+
220+
if not system_itemurl or not (match := RE_URL.match(system_itemurl)):
221+
self.target.log.warning(
222+
"Unable to parse System_ItemUrl: %r (%r) in %s", system_itemurl, values, db_path
223+
)
224+
return
225+
226+
browser, sid, url = match.groupdict().values()
227+
228+
if not (CurrentBrowserHistoryRecord := BROWSER_RECORD_MAP.get(browser)):
229+
self.target.log.warning(
230+
"Unable to determine browser history type for %r (%r) in %s", browser, system_itemurl, db_path
231+
)
232+
return
233+
234+
user = None
235+
if sid and (sid_user_details := self.target.user_details.find(sid)):
236+
user = sid_user_details.user
237+
238+
if not user and user_details:
239+
user = user_details.user
240+
241+
url = values.get("System_Link_TargetUrl") or url
242+
host = None
243+
244+
if url:
245+
try:
246+
host = urllib.parse.urlparse(url).hostname
247+
except Exception:
248+
pass
249+
250+
yield CurrentBrowserHistoryRecord(
251+
ts=wintimestamp(int.from_bytes(values.get("System_Link_DateVisited", b""), "little")),
252+
browser=browser,
253+
url=url,
254+
title=values.get("System_Title"),
255+
host=host,
256+
source=db_path,
257+
_user=user,
258+
_target=self.target,
259+
)
260+
261+
# System_Search_Store = "file"
262+
else:
263+
yield SearchIndexRecord(
264+
ts=wintimestamp(int.from_bytes(values.get("System_Search_GatherTime", b""), "little")),
265+
ts_mtime=wintimestamp(int.from_bytes(values.get("System_DateModified", b""), "little")),
266+
ts_btime=wintimestamp(int.from_bytes(values.get("System_DateCreated", b""), "little")),
267+
ts_atime=wintimestamp(int.from_bytes(values.get("System_DateAccessed", b""), "little")),
268+
path=values.get("System_ItemPathDisplay"),
269+
type=values.get("System_MIMEType")
270+
or values.get("System_ContentType")
271+
or values.get("System_ItemTypeText"),
272+
size=int.from_bytes(b_size, "little") if (b_size := values.get("System_Size")) else None,
273+
data=values.get("System_Search_AutoSummary"),
274+
source=db_path,
275+
_target=self.target,
276+
)
277+
278+
279+
class TableRecord:
280+
def __init__(self, table: EseDBTable, record: EseDBRecord):
281+
self.table = table
282+
self.record = record
283+
284+
# Translates e.g. ``System_DateModified`` to ``15F-System_DateModified`` as these column name prefixes might
285+
# be dynamic based on the system version.
286+
self.columns = {col.split("-", maxsplit=1)[-1]: col for col in table.column_names}
287+
288+
def get(self, key: str, default: Any | None = None) -> Any:
289+
return self.record.get(self.columns.get(key, default))

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ full = [
5353
"dissect.cim>=3,<4",
5454
"dissect.clfs>=1,<2",
5555
"dissect.cramfs>=1,<2",
56-
"dissect.esedb>=3,<4",
56+
"dissect.esedb>=3.17.dev1,<4", # TODO: update on release!
5757
"dissect.etl>=3,<4",
5858
"dissect.extfs>=3,<4",
5959
"dissect.fat>=3,<4",
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:e655a1af9eb3386ffdc7e19aa8dcda06dfa2c35a1c3b657ac4a9c1c13c83f020
3+
size 2957312
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:10dd5fc05c2d19aa1fa4a705142e413fc5a4af17ae8e5e4909164262f9de7c66
3+
size 33554432

0 commit comments

Comments
 (0)