@@ -44,6 +44,8 @@ public class BKDReader extends PointValues {
44
44
final long minLeafBlockFP ;
45
45
46
46
final IndexInput packedIndex ;
47
+ // if true, the tree is a legacy balanced tree
48
+ private final boolean isTreeBalanced ;
47
49
48
50
/**
49
51
* Caller must pre-seek the provided {@link IndexInput} to the index location that {@link
@@ -105,6 +107,52 @@ public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn) throw
105
107
}
106
108
this .packedIndex = indexIn .slice ("packedIndex" , indexStartPointer , numIndexBytes );
107
109
this .in = dataIn ;
110
+ // for only one leaf, balanced and unbalanced trees can be handled the same way
111
+ // we set it to unbalanced.
112
+ this .isTreeBalanced = numLeaves != 1 && isTreeBalanced ();
113
+ }
114
+
115
+ private boolean isTreeBalanced () throws IOException {
116
+ if (version >= BKDWriter .VERSION_META_FILE ) {
117
+ // since lucene 8.6 all trees are unbalanced.
118
+ return false ;
119
+ }
120
+ if (config .numDims > 1 ) {
121
+ // high dimensional tree in pre-8.6 indices are balanced.
122
+ assert 1 << MathUtil .log (numLeaves , 2 ) == numLeaves ;
123
+ return true ;
124
+ }
125
+ if (1 << MathUtil .log (numLeaves , 2 ) != numLeaves ) {
126
+ // if we don't have enough leaves to fill the last level then it is unbalanced
127
+ return false ;
128
+ }
129
+ // count of the last node for unbalanced trees
130
+ final int lastLeafNodePointCount = Math .toIntExact (pointCount % config .maxPointsInLeafNode );
131
+ // navigate to last node
132
+ PointTree pointTree = getPointTree ();
133
+ do {
134
+ while (pointTree .moveToSibling ()) {}
135
+ } while (pointTree .moveToChild ());
136
+ // count number of docs in the node
137
+ final int [] count = new int [] {0 };
138
+ pointTree .visitDocIDs (
139
+ new IntersectVisitor () {
140
+ @ Override
141
+ public void visit (int docID ) {
142
+ count [0 ]++;
143
+ }
144
+
145
+ @ Override
146
+ public void visit (int docID , byte [] packedValue ) {
147
+ throw new AssertionError ();
148
+ }
149
+
150
+ @ Override
151
+ public Relation compare (byte [] minPackedValue , byte [] maxPackedValue ) {
152
+ throw new AssertionError ();
153
+ }
154
+ });
155
+ return count [0 ] != lastLeafNodePointCount ;
108
156
}
109
157
110
158
@ Override
@@ -117,7 +165,8 @@ public PointTree getPointTree() throws IOException {
117
165
version ,
118
166
pointCount ,
119
167
minPackedValue ,
120
- maxPackedValue );
168
+ maxPackedValue ,
169
+ isTreeBalanced );
121
170
}
122
171
123
172
private static class BKDPointTree implements PointTree {
@@ -168,6 +217,8 @@ private static class BKDPointTree implements PointTree {
168
217
scratchMaxIndexPackedValue ;
169
218
private final int [] commonPrefixLengths ;
170
219
private final BKDReaderDocIDSetIterator scratchIterator ;
220
+ // if true the tree is balanced, otherwise unbalanced
221
+ private final boolean isTreeBalanced ;
171
222
172
223
private BKDPointTree (
173
224
IndexInput innerNodes ,
@@ -177,7 +228,8 @@ private BKDPointTree(
177
228
int version ,
178
229
long pointCount ,
179
230
byte [] minPackedValue ,
180
- byte [] maxPackedValue )
231
+ byte [] maxPackedValue ,
232
+ boolean isTreeBalanced )
181
233
throws IOException {
182
234
this (
183
235
innerNodes ,
@@ -194,7 +246,8 @@ private BKDPointTree(
194
246
new byte [config .packedBytesLength ],
195
247
new byte [config .packedIndexBytesLength ],
196
248
new byte [config .packedIndexBytesLength ],
197
- new int [config .numDims ]);
249
+ new int [config .numDims ],
250
+ isTreeBalanced );
198
251
// read root node
199
252
readNodeData (false );
200
253
}
@@ -214,12 +267,14 @@ private BKDPointTree(
214
267
byte [] scratchDataPackedValue ,
215
268
byte [] scratchMinIndexPackedValue ,
216
269
byte [] scratchMaxIndexPackedValue ,
217
- int [] commonPrefixLengths ) {
270
+ int [] commonPrefixLengths ,
271
+ boolean isTreeBalanced ) {
218
272
this .config = config ;
219
273
this .version = version ;
220
274
this .nodeID = nodeID ;
221
275
this .nodeRoot = nodeID ;
222
276
this .level = level ;
277
+ this .isTreeBalanced = isTreeBalanced ;
223
278
leafNodeOffset = numLeaves ;
224
279
this .innerNodes = innerNodes ;
225
280
this .leafNodes = leafNodes ;
@@ -268,7 +323,8 @@ public PointTree clone() {
268
323
scratchDataPackedValue ,
269
324
scratchMinIndexPackedValue ,
270
325
scratchMaxIndexPackedValue ,
271
- commonPrefixLengths );
326
+ commonPrefixLengths ,
327
+ isTreeBalanced );
272
328
index .leafBlockFPStack [index .level ] = leafBlockFPStack [level ];
273
329
if (isLeafNode () == false ) {
274
330
// copy node data
@@ -452,8 +508,8 @@ public long size() {
452
508
numLeaves = rightMostLeafNode - leftMostLeafNode + 1 + leafNodeOffset ;
453
509
}
454
510
assert numLeaves == getNumLeavesSlow (nodeID ) : numLeaves + " " + getNumLeavesSlow (nodeID );
455
- if (version < BKDWriter . VERSION_META_FILE && config . numDims > 1 ) {
456
- // before lucene 8.6, high dimensional trees were constructed as fully balanced trees.
511
+ if (isTreeBalanced ) {
512
+ // before lucene 8.6, trees might have been constructed as fully balanced trees.
457
513
return sizeFromBalancedTree (leftMostLeafNode , rightMostLeafNode );
458
514
}
459
515
// size for an unbalanced tree.
0 commit comments