Skip to content

Commit 1013771

Browse files
authored
Feature/fast iteration (#5)
* Add lookupGE, ForwardCursor, and Seq leaf-safe optimization lookupGE: O(log n) ceiling lookup with zero allocations. Returns the first stored element >= key, or nil. ForwardCursor: mutable forward-only cursor for efficient sequential ceiling lookups. Amortized O(1) per seekGE when keys arrive in ascending order. Supports next() for O(1) sequential advance and current() for non-advancing read. Seq leafSafe: skips per-element comparator bounds check when the entire current leaf is within the iteration range. Also adds searchFirstFrom to ANode (binary search with lower bound hint) and CLJS lookup-ge support. * Fix CLJS type inference warnings in branch and btset Add ^Branch type hints to merge/merge-split params, ^BTSet to helper functions, and ^js/^BTSet to seek's seq field access to eliminate "Cannot infer target type" warnings during CLJS compilation. * Remove lookupGE/ForwardCursor from public Clojure API Keep Java methods for internal use by datahike but do not expose them in the Clojure namespace. Mark lookupGE, ForwardCursor, and forwardCursor() as internal API subject to change in Javadoc. Tests updated to use interop directly via a local helper.
1 parent bc39d31 commit 1013771

File tree

6 files changed

+351
-10
lines changed

6 files changed

+351
-10
lines changed

src-clojure/org/replikativ/persistent_sorted_set/branch.cljs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@
467467
acc))]
468468
(set! _measure result)
469469
result)))))
470-
(merge [this next]
470+
(merge [this ^Branch next]
471471
(let [sc1 subtree-count
472472
sc2 (.-subtree-count next)
473473
new-sc (if (and (>= sc1 0) (>= sc2 0)) (+ sc1 sc2) -1)
@@ -489,7 +489,7 @@
489489
new-sc
490490
new-measure
491491
settings)))
492-
(merge-split [this next]
492+
(merge-split [this ^Branch next]
493493
(let [;; Ensure children arrays exist
494494
c1 (ensure-children this)
495495
c2 (ensure-children next)

src-clojure/org/replikativ/persistent_sorted_set/btset.cljs

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,29 @@
216216
cmp (or cmp (.-comparator set))]
217217
(await (node/lookup root (.-storage set) key cmp opts))))))
218218

219+
(defn lookup-ge
220+
"Look up the first element >= key (ceiling/GE lookup).
221+
Returns nil if no element >= key exists. O(log n)."
222+
[^BTSet set key cmp {:keys [sync?] :or {sync? true} :as opts}]
223+
(async+sync sync?
224+
(async
225+
(let [root (await (-root set opts))
226+
cmp (or cmp (.-comparator set))
227+
storage (.-storage set)]
228+
(loop [node root]
229+
(let [keys (.-keys node)
230+
len (arrays/alength keys)]
231+
(if (== len 0)
232+
nil
233+
(let [idx (binary-search-l cmp keys (dec len) key)]
234+
(if (>= idx len)
235+
nil
236+
(if (instance? Branch node)
237+
(let [child-node (await (branch/child node storage idx opts))]
238+
(recur child-node))
239+
;; Leaf — return the key at idx
240+
(arrays/aget keys idx)))))))))))
241+
219242
(defn measure
220243
"Get the aggregated statistics for the entire set."
221244
[^BTSet set {:keys [sync?] :or {sync? true} :as opts}]
@@ -247,11 +270,11 @@
247270

248271
(def ^:const EMPTY_PATH (js* "0n"))
249272

250-
(defn- bits-per-level [set]
273+
(defn- bits-per-level [^BTSet set]
251274
(let [bf (get (.-settings set) :branching-factor)]
252275
(Math/ceil (Math/log2 bf))))
253276

254-
(defn- max-len [set]
277+
(defn- max-len [^BTSet set]
255278
(get (.-settings set) :branching-factor))
256279

257280
(defn- min-len [set]
@@ -1383,7 +1406,7 @@
13831406
(assert (some? seq))
13841407
(if (fn? arg)
13851408
(seek seq key arg {:sync? true})
1386-
(seek seq key (.-comparator (.-set seq)) arg)))
1409+
(seek seq key (.-comparator ^BTSet (.-set ^js seq)) arg)))
13871410
([seq key cmp {:keys [sync?] :or {sync? true} :as opts}]
13881411
(assert (some? seq))
13891412
(assert (fn? cmp))

src-java/org/replikativ/persistent_sorted_set/ANode.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,34 @@ public int searchFirst(Key key, Comparator<Key> cmp) {
117117
return low;
118118
}
119119

120+
/**
121+
* Like searchFirst but starts from a known lower bound.
122+
* For forward cursors where we know the target is at or after startIdx.
123+
* Uses linear scan for small distances (<=8), binary search otherwise.
124+
*/
125+
public int searchFirstFrom(Key key, Comparator<Key> cmp, int startIdx) {
126+
int remaining = _len - startIdx;
127+
if (remaining <= 0) return _len;
128+
// Linear scan for small distances — avoids binary search overhead
129+
if (remaining <= 8) {
130+
for (int i = startIdx; i < _len; i++) {
131+
if (cmp.compare(_keys[i], key) >= 0) return i;
132+
}
133+
return _len;
134+
}
135+
// Binary search from startIdx
136+
int low = startIdx, high = _len;
137+
while (low < high) {
138+
int mid = (high + low) >>> 1;
139+
int d = cmp.compare(_keys[mid], key);
140+
if (d < 0)
141+
low = mid + 1;
142+
else
143+
high = mid;
144+
}
145+
return low;
146+
}
147+
120148
public int searchLast(Key key, Comparator<Key> cmp) {
121149
int low = 0, high = _len;
122150
while (low < high) {

src-java/org/replikativ/persistent_sorted_set/PersistentSortedSet.java

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,210 @@ public Key lookup(Object key, Comparator<Key> cmp) {
644644
}
645645
}
646646

647+
/**
648+
* Look up the first element >= key (ceiling/GE lookup).
649+
* O(log n) with zero allocations — no Seq chain created.
650+
* Returns null if no element >= key exists.
651+
*
652+
* <p><b>Internal API</b> — subject to change without notice.
653+
* Not exposed in the public Clojure namespace.
654+
*/
655+
public Key lookupGE(Object key) {
656+
return lookupGE(key, _cmp);
657+
}
658+
659+
public Key lookupGE(Object key, Comparator<Key> cmp) {
660+
ANode<Key, Address> node = root();
661+
662+
if (node.len() == 0) {
663+
return null;
664+
}
665+
666+
while (true) {
667+
int idx = node.searchFirst((Key) key, cmp);
668+
if (idx >= node._len) {
669+
return null;
670+
}
671+
672+
if (node instanceof Branch) {
673+
node = ((Branch<Key, Address>) node).child(_storage, idx);
674+
} else {
675+
return node._keys[idx];
676+
}
677+
}
678+
}
679+
680+
/**
681+
* Mutable forward-only cursor for efficient sequential lookupGE.
682+
* For sorted lookup keys, amortized O(1) per lookup instead of O(log n).
683+
* Not thread-safe. Must only be used for forward (ascending) seeks.
684+
*
685+
* <p><b>Internal API</b> — this class is subject to change or removal
686+
* without notice. It is not exposed in the public Clojure namespace.
687+
* External consumers should use {@link #lookupGE} or {@code slice} instead.
688+
*
689+
* <p>Usage:
690+
* <pre>
691+
* ForwardCursor c = pss.forwardCursor();
692+
* Key result1 = c.seekGE(key1); // O(log n) — first seek
693+
* Key result2 = c.seekGE(key2); // O(1) if in same leaf, else O(siblings skipped)
694+
* </pre>
695+
*/
696+
public class ForwardCursor {
697+
private final Comparator<Key> _cursorCmp;
698+
private ANode<Key, Address> _leaf; // current leaf node
699+
private int _leafIdx; // current index within leaf
700+
// Stack for tree traversal (height levels)
701+
// For a tree of height H, we need H-1 branch levels above the leaf.
702+
private Branch<Key, Address>[] _branches;
703+
private int[] _branchIdxs;
704+
private int _depth; // number of branch levels (0 for leaf-only)
705+
706+
@SuppressWarnings("unchecked")
707+
ForwardCursor(Comparator<Key> cmp) {
708+
_cursorCmp = cmp;
709+
ANode<Key, Address> root = root();
710+
if (root.len() == 0) {
711+
_leaf = null;
712+
_depth = 0;
713+
return;
714+
}
715+
// Compute tree height
716+
int height = 0;
717+
ANode<Key, Address> n = root;
718+
while (n instanceof Branch) {
719+
height++;
720+
n = ((Branch<Key, Address>) n).child(_storage, 0);
721+
}
722+
_depth = height;
723+
_branches = new Branch[height];
724+
_branchIdxs = new int[height];
725+
// Position at start (leftmost leaf)
726+
n = root;
727+
for (int level = 0; level < height; level++) {
728+
_branches[level] = (Branch<Key, Address>) n;
729+
_branchIdxs[level] = 0;
730+
n = ((Branch<Key, Address>) n).child(_storage, 0);
731+
}
732+
_leaf = n;
733+
_leafIdx = 0;
734+
}
735+
736+
/**
737+
* Seek forward to first element >= key.
738+
* Keys MUST be passed in ascending order across calls.
739+
* Returns null if no element >= key exists.
740+
*/
741+
public Key seekGE(Key key) {
742+
if (_leaf == null) return null;
743+
744+
// Fast path: key is within current leaf
745+
if (_cursorCmp.compare(key, _leaf.maxKey()) <= 0) {
746+
// Search from current position — target is always >= _leafIdx since keys are ascending
747+
int idx = _leaf.searchFirstFrom(key, _cursorCmp, _leafIdx);
748+
if (idx < _leaf._len) {
749+
_leafIdx = idx;
750+
return _leaf._keys[idx];
751+
}
752+
}
753+
754+
// Need to advance to a later leaf.
755+
// Walk up the branch stack to find a branch that contains our key,
756+
// then walk back down.
757+
int level = _depth - 1; // start from immediate parent of leaf
758+
while (level >= 0) {
759+
Branch<Key, Address> branch = _branches[level];
760+
int bi = _branchIdxs[level] + 1; // advance past current child
761+
// Linear scan forward through siblings (amortized O(1))
762+
while (bi < branch._len) {
763+
if (_cursorCmp.compare(key, branch._keys[bi]) <= 0) {
764+
// key <= this child's maxKey, so answer is in this subtree
765+
_branchIdxs[level] = bi;
766+
// Walk down to leaf
767+
ANode<Key, Address> node = branch.child(_storage, bi);
768+
for (int d = level + 1; d < _depth; d++) {
769+
_branches[d] = (Branch<Key, Address>) node;
770+
int childIdx = node.searchFirst(key, _cursorCmp);
771+
if (childIdx >= node._len) childIdx = node._len - 1;
772+
_branchIdxs[d] = childIdx;
773+
node = ((Branch<Key, Address>) node).child(_storage, childIdx);
774+
}
775+
_leaf = node;
776+
int idx = _leaf.searchFirst(key, _cursorCmp);
777+
if (idx < _leaf._len) {
778+
_leafIdx = idx;
779+
return _leaf._keys[idx];
780+
}
781+
// Key exceeds this leaf — continue to next sibling at this level
782+
bi++;
783+
continue;
784+
}
785+
bi++;
786+
}
787+
level--; // go up one level
788+
}
789+
790+
// Exhausted all branches
791+
_leaf = null;
792+
return null;
793+
}
794+
795+
/**
796+
* Advance cursor to the next element and return it.
797+
* Returns null if no more elements exist.
798+
* O(1) within a leaf, amortized O(1) across leaves.
799+
*/
800+
public Key next() {
801+
if (_leaf == null) return null;
802+
_leafIdx++;
803+
if (_leafIdx < _leaf._len) {
804+
return _leaf._keys[_leafIdx];
805+
}
806+
// Need to advance to next leaf via branch stack
807+
for (int level = _depth - 1; level >= 0; level--) {
808+
int bi = _branchIdxs[level] + 1;
809+
if (bi < _branches[level]._len) {
810+
_branchIdxs[level] = bi;
811+
ANode<Key, Address> node = _branches[level].child(_storage, bi);
812+
for (int d = level + 1; d < _depth; d++) {
813+
_branches[d] = (Branch<Key, Address>) node;
814+
_branchIdxs[d] = 0;
815+
node = ((Branch<Key, Address>) node).child(_storage, 0);
816+
}
817+
_leaf = node;
818+
_leafIdx = 0;
819+
return _leaf._keys[0];
820+
}
821+
}
822+
_leaf = null;
823+
return null;
824+
}
825+
826+
/**
827+
* Return the current element without advancing.
828+
* Returns null if the cursor is exhausted or not yet positioned.
829+
*/
830+
public Key current() {
831+
if (_leaf == null || _leafIdx < 0 || _leafIdx >= _leaf._len) return null;
832+
return _leaf._keys[_leafIdx];
833+
}
834+
835+
}
836+
837+
/**
838+
* Create a forward cursor positioned at the start of the set.
839+
* Use for efficient sequential lookupGE with ascending keys.
840+
*
841+
* <p><b>Internal API</b> — subject to change without notice.
842+
*/
843+
public ForwardCursor forwardCursor() {
844+
return new ForwardCursor(_cmp);
845+
}
846+
847+
public ForwardCursor forwardCursor(Comparator<Key> cmp) {
848+
return new ForwardCursor(cmp);
849+
}
850+
647851
// IEditableCollection
648852
public PersistentSortedSet asTransient() {
649853
if (editable()) {

src-java/org/replikativ/persistent_sorted_set/Seq.java

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ public class Seq extends ASeq implements IReduce, Reversible, IChunkedSeq, ISeek
1313
final Comparator _cmp;
1414
final boolean _asc;
1515
final int _version;
16+
// True when the entire current leaf is within bounds — skip over() check
17+
boolean _leafSafe;
1618

1719
Seq(IPersistentMap meta, PersistentSortedSet set, Seq parent, ANode node, int idx, Object keyTo, Comparator cmp, boolean asc, int version) {
1820
super(meta);
@@ -24,6 +26,20 @@ public class Seq extends ASeq implements IReduce, Reversible, IChunkedSeq, ISeek
2426
_cmp = cmp;
2527
_asc = asc;
2628
_version = version;
29+
_leafSafe = computeLeafSafe();
30+
}
31+
32+
// Check if all remaining elements in the current leaf are within bounds.
33+
// Avoids per-element comparator calls during within-leaf iteration.
34+
boolean computeLeafSafe() {
35+
if (_keyTo == null || _cmp == null) return true;
36+
if (_asc) {
37+
// All elements up to leaf end are valid if maxKey <= _keyTo
38+
return _cmp.compare(_node.maxKey(), _keyTo) <= 0;
39+
} else {
40+
// All elements down to leaf start are valid if minKey >= _keyTo
41+
return _cmp.compare(_node.minKey(), _keyTo) >= 0;
42+
}
2743
}
2844

2945
void checkVersion() {
@@ -47,25 +63,27 @@ boolean advance() {
4763
if (_asc) {
4864
if (_idx < _node._len - 1) {
4965
_idx++;
50-
return !over();
66+
return _leafSafe || !over();
5167
} else if (_parent != null) {
5268
_parent = _parent.next();
5369
if (_parent != null) {
5470
_node = _parent.child();
5571
_idx = 0;
56-
return !over();
72+
_leafSafe = computeLeafSafe();
73+
return _leafSafe || !over();
5774
}
5875
}
5976
} else { // !_asc
6077
if (_idx > 0) {
6178
_idx--;
62-
return !over();
79+
return _leafSafe || !over();
6380
} else if (_parent != null) {
6481
_parent = _parent.next();
6582
if (_parent != null) {
6683
_node = _parent.child();
6784
_idx = _node._len - 1;
68-
return !over();
85+
_leafSafe = computeLeafSafe();
86+
return _leafSafe || !over();
6987
}
7088
}
7189
}

0 commit comments

Comments
 (0)