|
53 | 53 |
|
54 | 54 | final class ES819TSDBDocValuesProducer extends DocValuesProducer { |
55 | 55 | final IntObjectHashMap<NumericEntry> numerics; |
| 56 | + private int primarySortFieldNumber = -1; |
56 | 57 | final IntObjectHashMap<BinaryEntry> binaries; |
57 | 58 | final IntObjectHashMap<SortedEntry> sorted; |
58 | 59 | final IntObjectHashMap<SortedSetEntry> sortedSets; |
@@ -91,7 +92,11 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { |
91 | 92 | ); |
92 | 93 |
|
93 | 94 | readFields(in, state.fieldInfos); |
94 | | - |
| 95 | + final var indexSort = state.segmentInfo.getIndexSort(); |
| 96 | + if (indexSort != null) { |
| 97 | + var primaryField = indexSort.getSort()[0]; |
| 98 | + primarySortFieldNumber = state.fieldInfos.fieldInfo(primaryField.getField()).number; |
| 99 | + } |
95 | 100 | } catch (Throwable exception) { |
96 | 101 | priorE = exception; |
97 | 102 | } finally { |
@@ -333,10 +338,10 @@ public boolean advanceExact(int target) throws IOException { |
333 | 338 | @Override |
334 | 339 | public SortedDocValues getSorted(FieldInfo field) throws IOException { |
335 | 340 | SortedEntry entry = sorted.get(field.number); |
336 | | - return getSorted(entry); |
| 341 | + return getSorted(entry, field.number == primarySortFieldNumber); |
337 | 342 | } |
338 | 343 |
|
339 | | - private SortedDocValues getSorted(SortedEntry entry) throws IOException { |
| 344 | + private SortedDocValues getSorted(SortedEntry entry, boolean valuesSorted) throws IOException { |
340 | 345 | final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize); |
341 | 346 | return new BaseSortedDocValues(entry) { |
342 | 347 |
|
@@ -369,10 +374,29 @@ public int advance(int target) throws IOException { |
369 | 374 | public long cost() { |
370 | 375 | return ords.cost(); |
371 | 376 | } |
| 377 | + |
| 378 | + @Override |
| 379 | + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { |
| 380 | + if (valuesSorted && ords instanceof BaseDenseNumericValues denseOrds) { |
| 381 | + int firstDoc = docs.get(offset); |
| 382 | + denseOrds.advanceExact(firstDoc); |
| 383 | + long startValue = denseOrds.longValue(); |
| 384 | + final int docCount = docs.count(); |
| 385 | + int lastDoc = docs.get(docCount - 1); |
| 386 | + long lastValue = denseOrds.lookAheadValueAt(lastDoc); |
| 387 | + if (lastValue == startValue) { |
| 388 | + BytesRef b = lookupOrd(Math.toIntExact(startValue)); |
| 389 | + return factory.constantBytes(BytesRef.deepCopyOf(b), docCount - offset); |
| 390 | + } |
| 391 | + // TODO: Since ordinals are sorted, start at 0 (offset by startValue), scan until lastValue, |
| 392 | + // then fill remaining positions with lastValue. |
| 393 | + } |
| 394 | + return null; |
| 395 | + } |
372 | 396 | }; |
373 | 397 | } |
374 | 398 |
|
375 | | - abstract class BaseSortedDocValues extends SortedDocValues { |
| 399 | + abstract class BaseSortedDocValues extends SortedDocValues implements BlockLoader.OptionalColumnAtATimeReader { |
376 | 400 |
|
377 | 401 | final SortedEntry entry; |
378 | 402 | final TermsEnum termsEnum; |
@@ -406,6 +430,15 @@ public int lookupTerm(BytesRef key) throws IOException { |
406 | 430 | public TermsEnum termsEnum() throws IOException { |
407 | 431 | return new TermsDict(entry.termsDictEntry, data, merging); |
408 | 432 | } |
| 433 | + |
| 434 | + @Override |
| 435 | + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { |
| 436 | + return null; |
| 437 | + } |
| 438 | + } |
| 439 | + |
| 440 | + abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader { |
| 441 | + abstract long lookAheadValueAt(int targetDoc) throws IOException; |
409 | 442 | } |
410 | 443 |
|
411 | 444 | abstract static class BaseSortedSetDocValues extends SortedSetDocValues { |
@@ -695,7 +728,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti |
695 | 728 | public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { |
696 | 729 | SortedSetEntry entry = sortedSets.get(field.number); |
697 | 730 | if (entry.singleValueEntry != null) { |
698 | | - return DocValues.singleton(getSorted(entry.singleValueEntry)); |
| 731 | + return DocValues.singleton(getSorted(entry.singleValueEntry, field.number == primarySortFieldNumber)); |
699 | 732 | } |
700 | 733 |
|
701 | 734 | SortedNumericEntry ordsEntry = entry.ordsEntry; |
@@ -1141,13 +1174,17 @@ public long longValue() { |
1141 | 1174 | final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; |
1142 | 1175 | if (entry.docsWithFieldOffset == -1) { |
1143 | 1176 | // dense |
1144 | | - return new BulkNumericDocValues() { |
| 1177 | + return new BaseDenseNumericValues() { |
1145 | 1178 |
|
1146 | 1179 | private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc; |
1147 | 1180 | private int doc = -1; |
1148 | 1181 | private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); |
1149 | 1182 | private long currentBlockIndex = -1; |
1150 | 1183 | private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; |
| 1184 | + // lookahead block |
| 1185 | + private long lookaheadBlockIndex = -1; |
| 1186 | + private long[] lookaheadBlock; |
| 1187 | + private IndexInput lookaheadData = null; |
1151 | 1188 |
|
1152 | 1189 | @Override |
1153 | 1190 | public int docID() { |
@@ -1183,24 +1220,28 @@ public long longValue() throws IOException { |
1183 | 1220 | final int index = doc; |
1184 | 1221 | final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; |
1185 | 1222 | final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; |
1186 | | - if (blockIndex != currentBlockIndex) { |
1187 | | - assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex; |
1188 | | - // no need to seek if the loading block is the next block |
1189 | | - if (currentBlockIndex + 1 != blockIndex) { |
1190 | | - valuesData.seek(indexReader.get(blockIndex)); |
1191 | | - } |
1192 | | - currentBlockIndex = blockIndex; |
1193 | | - if (maxOrd >= 0) { |
1194 | | - decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); |
1195 | | - } else { |
1196 | | - decoder.decode(valuesData, currentBlock); |
1197 | | - } |
| 1223 | + if (blockIndex == currentBlockIndex) { |
| 1224 | + return currentBlock[blockInIndex]; |
| 1225 | + } |
| 1226 | + if (blockIndex == lookaheadBlockIndex) { |
| 1227 | + return lookaheadBlock[blockInIndex]; |
| 1228 | + } |
| 1229 | + assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex; |
| 1230 | + // no need to seek if the loading block is the next block |
| 1231 | + if (currentBlockIndex + 1 != blockIndex) { |
| 1232 | + valuesData.seek(indexReader.get(blockIndex)); |
| 1233 | + } |
| 1234 | + currentBlockIndex = blockIndex; |
| 1235 | + if (maxOrd >= 0) { |
| 1236 | + decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); |
| 1237 | + } else { |
| 1238 | + decoder.decode(valuesData, currentBlock); |
1198 | 1239 | } |
1199 | 1240 | return currentBlock[blockInIndex]; |
1200 | 1241 | } |
1201 | 1242 |
|
1202 | 1243 | @Override |
1203 | | - public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { |
| 1244 | + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { |
1204 | 1245 | assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]"; |
1205 | 1246 | final int docsCount = docs.count(); |
1206 | 1247 | doc = docs.get(docsCount - 1); |
@@ -1238,6 +1279,32 @@ public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs |
1238 | 1279 | } |
1239 | 1280 | } |
1240 | 1281 |
|
| 1282 | + @Override |
| 1283 | + long lookAheadValueAt(int targetDoc) throws IOException { |
| 1284 | + final int blockIndex = targetDoc >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; |
| 1285 | + final int valueIndex = targetDoc & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; |
| 1286 | + if (blockIndex == currentBlockIndex) { |
| 1287 | + return currentBlock[valueIndex]; |
| 1288 | + } |
| 1289 | + // load data to the lookahead block |
| 1290 | + if (lookaheadBlockIndex != blockIndex) { |
| 1291 | + if (lookaheadBlock == null) { |
| 1292 | + lookaheadBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; |
| 1293 | + lookaheadData = data.slice("look_ahead_values", entry.valuesOffset, entry.valuesLength); |
| 1294 | + } |
| 1295 | + if (lookaheadBlockIndex + 1 != blockIndex) { |
| 1296 | + lookaheadData.seek(indexReader.get(blockIndex)); |
| 1297 | + } |
| 1298 | + if (maxOrd >= 0) { |
| 1299 | + decoder.decodeOrdinals(lookaheadData, lookaheadBlock, bitsPerOrd); |
| 1300 | + } else { |
| 1301 | + decoder.decode(lookaheadData, lookaheadBlock); |
| 1302 | + } |
| 1303 | + lookaheadBlockIndex = blockIndex; |
| 1304 | + } |
| 1305 | + return lookaheadBlock[valueIndex]; |
| 1306 | + } |
| 1307 | + |
1241 | 1308 | static boolean isDense(int firstDocId, int lastDocId, int length) { |
1242 | 1309 | // This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense), |
1243 | 1310 | // this can happen with enrich or lookup. However this codec isn't used for enrich / lookup. |
|
0 commit comments