Skip to content
This repository was archived by the owner on Nov 20, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dissect/esedb/c_esedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def decode_bit(buf: bytes) -> bool:
return c_esedb.uint8(buf) == 0xFF


def decode_text(buf: bytes, encoding: CODEPAGE) -> str:
def decode_text(buf: bytes, encoding: CODEPAGE, errors: str | None = "backslashreplace") -> str:
"""Decode text with the appropriate encoding.

Args:
Expand All @@ -504,7 +504,7 @@ def decode_text(buf: bytes, encoding: CODEPAGE) -> str:
if encoding == CODEPAGE.UNICODE and len(buf) % 2:
buf += b"\x00"

return buf.decode(CODEPAGE_MAP[encoding]).rstrip("\x00")
return buf.decode(CODEPAGE_MAP[encoding], errors=errors).rstrip("\x00")


def decode_guid(buf: bytes) -> str:
Expand Down
15 changes: 9 additions & 6 deletions dissect/esedb/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def __init__(self, table: Table, node: Node):
self._get_tag_field = lru_cache(4096)(self._get_tag_field)
self._find_tag_field_idx = lru_cache(4096)(self._find_tag_field_idx)

def get(self, column: Column, raw: bool = False) -> RecordValue:
def get(self, column: Column, raw: bool = False, errors: str | None = "backslashreplace") -> RecordValue:
"""Retrieve the value for the specified column.

Optionally receive the raw data as it's stored in the record.
Expand All @@ -206,6 +206,7 @@ def get(self, column: Column, raw: bool = False) -> RecordValue:
Args:
column: The column to retrieve the value of.
raw: Whether to return the raw data stored in the record instead of the parsed value.
errors: Error handling scheme to use when decoding bytes to text (default: 'backslashreplace').
"""
value = None
tag_field = None
Expand All @@ -228,11 +229,11 @@ def get(self, column: Column, raw: bool = False) -> RecordValue:
return value

if value is not None:
return self._parse_value(column, value, tag_field)
return self._parse_value(column, value, tag_field, errors)

return None

def as_dict(self, raw: bool = False) -> dict[str, RecordValue]:
def as_dict(self, raw: bool = False, errors: str | None = "backslashreplace") -> dict[str, RecordValue]:
"""Serialize the record as a dictionary."""
obj = {}

Expand All @@ -251,21 +252,23 @@ def _iter_column_id() -> Iterator[Column]:
column = self.table._column_id_map[column_id]

try:
obj[column.name] = self.get(column, raw)
obj[column.name] = self.get(column, raw, errors)
except Exception as e:
obj[column.name] = f"!ERROR! {e}"

return obj

def _parse_value(self, column: Column, value: bytes, tag_field: TagField = None) -> RecordValue:
def _parse_value(
self, column: Column, value: bytes, tag_field: TagField = None, errors: str | None = "backslashreplace"
) -> RecordValue:
"""Parse the raw value into the appropriate type.

For tagged columns, also interpret things like multi-values, separated and compressed data.
"""
ctype = column.ctype
parse_func = ctype.parse
if column.is_text:
parse_func = functools.partial(ctype.parse, encoding=column.encoding)
parse_func = functools.partial(ctype.parse, encoding=column.encoding, errors=errors)

if self.esedb.impacket_compat:
if tag_field and tag_field.flags & TAGFLD_HEADER.Compressed:
Expand Down
Binary file added tests/_data/Windows.edb.gz
Binary file not shown.
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,8 @@ def sru_db() -> Iterator[BinaryIO]:
@pytest.fixture
def ual_db() -> Iterator[BinaryIO]:
yield from open_file_gz("_data/Current.mdb.gz")


@pytest.fixture
def windows_search_db() -> Iterator[BinaryIO]:
yield from open_file_gz("_data/Windows.edb.gz")
16 changes: 16 additions & 0 deletions tests/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,19 @@ def test_comparison(basic_db: BinaryIO) -> None:

assert set(records) == {records[0], records[1]}
assert set(records) | {obj} == {records[0], records[1]}


def test_parse_value_encoding(windows_search_db: BinaryIO) -> None:
"""Test if we can parse invalid utf-16-le (Long)Text columns.

Resources:
- https://github.com/fox-it/dissect.esedb/pull/48
"""

db = EseDB(windows_search_db)
table = db.table("SystemIndex_PropertyStore")

record = table.search(WorkID=1017)
auto_summary = record.get("4625-System_Search_AutoSummary")
assert auto_summary.startswith("Hong Kong SCS AdobeMingStd-Light-Acro-HKscs-B5-H ASCII")
assert auto_summary.endswith("\\x4c\\xd8")