Skip to content
This repository was archived by the owner on Nov 20, 2025. It is now read-only.

Commit e11d651

Browse files
authored
Improve support searching indexes (#36)
1 parent 3f721c4 commit e11d651

File tree

11 files changed

+583
-147
lines changed

11 files changed

+583
-147
lines changed

dissect/esedb/btree.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
from dissect.esedb.exceptions import KeyNotFoundError, NoNeighbourPageError
6+
7+
if TYPE_CHECKING:
8+
from dissect.esedb.esedb import EseDB
9+
from dissect.esedb.page import Node, Page
10+
11+
12+
class BTree:
13+
"""A simple implementation for searching the ESE B+Trees.
14+
15+
This is a stateful interactive class that moves an internal cursor to a position within the BTree.
16+
17+
Args:
18+
esedb: An instance of :class:`~dissect.esedb.esedb.EseDB`.
19+
page: The page to open the :class:`BTree` on.
20+
"""
21+
22+
def __init__(self, esedb: EseDB, root: int | Page):
23+
self.esedb = esedb
24+
25+
if isinstance(root, int):
26+
page_num = root
27+
root = esedb.page(page_num)
28+
else:
29+
page_num = root.num
30+
31+
self.root = root
32+
33+
self._page = root
34+
self._page_num = page_num
35+
self._node_num = 0
36+
37+
def reset(self) -> None:
38+
"""Reset the internal state to the root of the BTree."""
39+
self._page = self.root
40+
self._page_num = self._page.num
41+
self._node_num = 0
42+
43+
def node(self) -> Node:
44+
"""Return the node the BTree is currently on.
45+
46+
Returns:
47+
A :class:`~dissect.esedb.page.Node` object of the current node.
48+
"""
49+
return self._page.node(self._node_num)
50+
51+
def next(self) -> Node:
52+
"""Move the BTree to the next node and return it.
53+
54+
Can move the BTree to the next page as a side effect.
55+
56+
Returns:
57+
A :class:`~dissect.esedb.page.Node` object of the next node.
58+
"""
59+
if self._node_num + 1 > self._page.node_count - 1:
60+
self.next_page()
61+
else:
62+
self._node_num += 1
63+
64+
return self.node()
65+
66+
def next_page(self) -> None:
67+
"""Move the BTree to the next page in the tree.
68+
69+
Raises:
70+
NoNeighbourPageError: If the current page has no next page.
71+
"""
72+
if self._page.next_page:
73+
self._page = self.esedb.page(self._page.next_page)
74+
self._node_num = 0
75+
else:
76+
raise NoNeighbourPageError(f"{self._page} has no next page")
77+
78+
def prev(self) -> Node:
79+
"""Move the BTree to the previous node and return it.
80+
81+
Can move the BTree to the previous page as a side effect.
82+
83+
Returns:
84+
A :class:`~dissect.esedb.page.Node` object of the previous node.
85+
"""
86+
if self._node_num - 1 < 0:
87+
self.prev_page()
88+
else:
89+
self._node_num -= 1
90+
91+
return self.node()
92+
93+
def prev_page(self) -> None:
94+
"""Move the BTree to the previous page in the tree.
95+
96+
Raises:
97+
NoNeighbourPageError: If the current page has no previous page.
98+
"""
99+
if self._page.previous_page:
100+
self._page = self.esedb.page(self._page.previous_page)
101+
self._node_num = self._page.node_count - 1
102+
else:
103+
raise NoNeighbourPageError(f"{self._page} has no previous page")
104+
105+
def search(self, key: bytes, exact: bool = True) -> Node:
106+
"""Search the tree for the given ``key``.
107+
108+
Moves the BTree to the matching node, or on the last node that is less than the requested key.
109+
110+
Args:
111+
key: The key to search for.
112+
exact: Whether to only return successfully on an exact match.
113+
114+
Raises:
115+
KeyNotFoundError: If an ``exact`` match was requested but not found.
116+
"""
117+
page = self._page
118+
while True:
119+
node = find_node(page, key)
120+
121+
if page.is_branch:
122+
page = self.esedb.page(node.child)
123+
else:
124+
self._page = page
125+
self._page_num = page.num
126+
self._node_num = node.num
127+
break
128+
129+
if exact and key != node.key:
130+
raise KeyNotFoundError(f"Can't find key: {key}")
131+
132+
return self.node()
133+
134+
135+
def find_node(page: Page, key: bytes) -> Node:
136+
"""Search a page for a node matching ``key``.
137+
138+
Args:
139+
page: The page to search.
140+
key: The key to search.
141+
"""
142+
first_node_idx = 0
143+
last_node_idx = page.node_count - 1
144+
145+
node = None
146+
while first_node_idx < last_node_idx:
147+
node_idx = (first_node_idx + last_node_idx) // 2
148+
node = page.node(node_idx)
149+
150+
# It turns out that the way BTree keys are compared matches 1:1 with how Python compares bytes
151+
# First compare data, then length
152+
if key < node.key:
153+
last_node_idx = node_idx
154+
elif key == node.key:
155+
if page.is_branch:
156+
# If there's an exact match on a key on a branch page, the actual leaf nodes are in the next branch
157+
# Page keys for branch pages appear to be non-inclusive upper bounds
158+
node_idx = min(node_idx + 1, page.node_count - 1)
159+
node = page.node(node_idx)
160+
161+
return node
162+
else:
163+
first_node_idx = node_idx + 1
164+
165+
# We're at the last node
166+
return page.node(first_node_idx)

dissect/esedb/c_esedb.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,34 @@
426426
DotNetGuid = 0x00040000, // index over GUID column according to .Net GUID sort order
427427
ImmutableStructure = 0x00080000, // Do not write to the input structures during a JetCreateIndexN call.
428428
};
429+
430+
flag IDBFLAG : uint16 {
431+
Unique = 0x0001, // Duplicate keys not allowed
432+
AllowAllNulls = 0x0002, // Make entries for NULL keys (all segments are null)
433+
AllowFirstNull = 0x0004, // First index column NULL allowed in index
434+
AllowSomeNulls = 0x0008, // Make entries for keys with some null segments
435+
NoNullSeg = 0x0010, // Don't allow a NULL key segment
436+
Primary = 0x0020, // Index is the primary index
437+
LocaleSet = 0x0040, // Index locale information (locale name) is set (JET_bitIndexUnicode was specified).
438+
Multivalued = 0x0080, // Has a multivalued segment
439+
TemplateIndex = 0x0100, // Index of a template table
440+
DerivedIndex = 0x0200, // Index derived from template table
441+
// Note that this flag is persisted, but
442+
// never used in an in-memory IDB, because
443+
// we use the template index IDB instead.
444+
LocalizedText = 0x0400, // Has a unicode text column? (code page is 1200)
445+
SortNullsHigh = 0x0800, // NULL sorts after data
446+
// Jan 2012: MSU is being removed. fidbUnicodeFixupOn should no longer be referenced.
447+
UnicodeFixupOn_Deprecated = 0x1000, // Track entries with undefined Unicode codepoints
448+
CrossProduct = 0x2000, // all combinations of multi-valued columns are indexed
449+
DisallowTruncation = 0x4000, // fail update rather than allow key truncation
450+
NestedTable = 0x8000, // combinations of multi-valued columns of same itagSequence are indexed
451+
};
452+
453+
flag IDXFLAG : uint16 {
454+
ExtendedColumns = 0x0001, // IDXSEGs are comprised of JET_COLUMNIDs, not FIDs
455+
DotNetGuid = 0x0002, // GUIDs sort according to .Net rules
456+
};
429457
""" # noqa E501
430458

431459
c_esedb = cstruct().load(esedb_def)
@@ -444,6 +472,8 @@
444472
TAGFLD_HEADER = c_esedb.TAGFLD_HEADER
445473
CODEPAGE = c_esedb.CODEPAGE
446474
COMPRESSION_SCHEME = c_esedb.COMPRESSION_SCHEME
475+
IDBFLAG = c_esedb.IDBFLAG
476+
IDXFLAG = c_esedb.IDXFLAG
447477

448478
CODEPAGE_MAP = {
449479
CODEPAGE.UNICODE: "utf-16-le",

0 commit comments

Comments
 (0)