Skip to content

Commit 699eb2b

Browse files
authored
Use doc values skipper for _tsid in synthetic _id postings (#138568)
Instead of scanning all documents to find the first document that has a _tsid less than, or equal to, a given ordinal we can use a doc values skipper to skip as much as possible documents, and only then scan. the remaining docs. When seeking a synthetic _id, we look up the _tsid ordinal, then use DV skipper to find a starting doc ID, then scan each doc to find the first doc ID matching the exact _tsid ordinal. Then we finally scan remaining docs to find the one matching the timestamp. Relates ES-13604
1 parent e344cf4 commit 699eb2b

File tree

3 files changed

+49
-15
lines changed

3 files changed

+49
-15
lines changed

docs/changelog/138568.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 138568
2+
summary: Use doc values skipper for `_tsid` in synthetic `_id` postings
3+
area: TSDB
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -280,22 +280,44 @@ private BytesRef lookupTsIdOrd(int tsIdOrdinal) throws IOException {
280280
}
281281

282282
/**
283-
* Scan all documents to find the first document that has a _tsid equal or greater than the provided _tsid ordinal, returning its
284-
* document ID. If no document is found, the method returns {@link DocIdSetIterator#NO_MORE_DOCS}.
283+
* Use a doc values skipper to find a starting document ID for the provided _tsid ordinal. The returned document ID might have the
284+
* exact _tsid ordinal provided, or a lower one.
285285
*
286-
* Warning: This method is very slow because it potentially scans all documents in the segment.
286+
* @param tsIdOrd the _tsid ordinal
287+
* @return a docID to start scanning documents from in order to find the first document ID matching the provided _tsid
288+
* @throws IOException if any I/O exception occurs
287289
*/
288-
private int slowScanToFirstDocWithTsIdOrdinalEqualOrGreaterThan(int tsIdOrd) throws IOException {
290+
private int findStartDocIDForTsIdOrd(int tsIdOrd) throws IOException {
291+
var skipper = docValuesProducer.getSkipper(tsIdFieldInfo);
292+
assert skipper != null;
293+
if (skipper.minValue() > tsIdOrd || tsIdOrd > skipper.maxValue()) {
294+
return DocIdSetIterator.NO_MORE_DOCS;
295+
}
296+
skipper.advance(tsIdOrd, Long.MAX_VALUE);
297+
return skipper.minDocID(0);
298+
}
299+
300+
/**
301+
* Find the first document that has a _tsid equal or greater than the provided _tsid ordinal, returning its document ID. If no
302+
* document is found, the method returns {@link DocIdSetIterator#NO_MORE_DOCS}.
303+
*
304+
* Warning: This method can be slow because it potentially scans many documents in the segment.
305+
*/
306+
private int findFirstDocWithTsIdOrdinalEqualOrGreaterThan(int tsIdOrd) throws IOException {
307+
final int startDocId = findStartDocIDForTsIdOrd(tsIdOrd);
308+
if (startDocId == DocIdSetIterator.NO_MORE_DOCS) {
309+
return startDocId;
310+
}
289311
// recreate even if doc values are already on the same ordinal, to ensure the method returns the first doc
290-
if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd)) {
312+
if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd) || tsIdDocValues.docID() > startDocId) {
291313
tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
292314
cachedTsIdOrd = -1;
293315
cachedTsId = null;
294316
}
295317
assert 0 <= tsIdOrd : tsIdOrd;
296318
assert tsIdOrd < tsIdDocValues.getValueCount() : tsIdOrd;
297319

298-
for (int docID = 0; docID != DocIdSetIterator.NO_MORE_DOCS; docID = tsIdDocValues.nextDoc()) {
320+
for (int docID = startDocId; docID != DocIdSetIterator.NO_MORE_DOCS; docID = tsIdDocValues.nextDoc()) {
299321
boolean found = tsIdDocValues.advanceExact(docID);
300322
assert found : "No value found for field [" + tsIdFieldInfo.getName() + " and docID " + docID;
301323
var ord = tsIdDocValues.ordValue();
@@ -313,22 +335,25 @@ private int slowScanToFirstDocWithTsIdOrdinalEqualOrGreaterThan(int tsIdOrd) thr
313335
}
314336

315337
/**
316-
* Scan all documents to find the first document that has a _tsid equal to the provided _tsid ordinal, returning its
317-
* document ID. If no document is found, the method returns {@link DocIdSetIterator#NO_MORE_DOCS}.
338+
* Find the first document that has a _tsid equal to the provided _tsid ordinal, returning its document ID. If no document is found,
339+
* the method returns {@link DocIdSetIterator#NO_MORE_DOCS}.
318340
*
319-
* Warning: This method is very slow because it potentially scans all documents in the segment.
341+
* Warning: This method can be slow because it potentially scans many documents in the segment.
320342
*/
321-
private int slowScanToFirstDocWithTsIdOrdinalEqualTo(int tsIdOrd) throws IOException {
343+
private int findFirstDocWithTsIdOrdinalEqualTo(int tsIdOrd) throws IOException {
344+
final int startDocId = findStartDocIDForTsIdOrd(tsIdOrd);
345+
assert startDocId != DocIdSetIterator.NO_MORE_DOCS : startDocId;
346+
322347
// recreate even if doc values are already on the same ordinal, to ensure the method returns the first doc
323-
if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd)) {
348+
if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd) || tsIdDocValues.docID() > startDocId) {
324349
tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
325350
cachedTsIdOrd = -1;
326351
cachedTsId = null;
327352
}
328353
assert 0 <= tsIdOrd : tsIdOrd;
329354
assert tsIdOrd < tsIdDocValues.getValueCount() : tsIdOrd;
330355

331-
for (int docID = 0; docID != DocIdSetIterator.NO_MORE_DOCS; docID = tsIdDocValues.nextDoc()) {
356+
for (int docID = startDocId; docID != DocIdSetIterator.NO_MORE_DOCS; docID = tsIdDocValues.nextDoc()) {
332357
boolean found = tsIdDocValues.advanceExact(docID);
333358
assert found : "No value found for field [" + tsIdFieldInfo.getName() + " and docID " + docID;
334359
var ord = tsIdDocValues.ordValue();
@@ -441,7 +466,7 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
441466
tsIdOrd = -tsIdOrd - 1;
442467
// set the terms enum on the first non-matching document
443468
if (tsIdOrd < docValues.getTsIdValueCount()) {
444-
int docID = docValues.slowScanToFirstDocWithTsIdOrdinalEqualOrGreaterThan(tsIdOrd);
469+
int docID = docValues.findFirstDocWithTsIdOrdinalEqualOrGreaterThan(tsIdOrd);
445470
if (docID != DocIdSetIterator.NO_MORE_DOCS) {
446471
current = new SyntheticTerm(
447472
docID,
@@ -461,8 +486,8 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
461486
// _tsid found, extract the timestamp
462487
final long timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(id);
463488

464-
// Slow scan to the first document matching the _tsid
465-
final int startDocID = docValues.slowScanToFirstDocWithTsIdOrdinalEqualTo(tsIdOrd);
489+
// Find the first document matching the _tsid
490+
final int startDocID = docValues.findFirstDocWithTsIdOrdinalEqualTo(tsIdOrd);
466491
assert startDocID >= 0 : startDocID;
467492

468493
int docID = startDocID;

server/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ public static ParsedDocument noopTombstone(SeqNoFieldMapper.SeqNoIndexOptions se
7575
* The returned document consists only _uid, _seqno, _term and _version fields; other metadata fields are excluded.
7676
* @param id the id of the deleted document
7777
*/
78+
// used by tests
7879
public static ParsedDocument deleteTombstone(SeqNoFieldMapper.SeqNoIndexOptions seqNoIndexOptions, String id) {
7980
return deleteTombstone(seqNoIndexOptions, false /* ignored */, false, id, null /* ignored */);
8081
}
@@ -101,6 +102,9 @@ public static ParsedDocument deleteTombstone(
101102
// Use a synthetic _id field which is not indexed nor stored
102103
document.add(IdFieldMapper.syntheticIdField(id));
103104

105+
// Add doc values fields that are used to synthesize the synthetic _id.
106+
// Note: It is not strictly required for tombstones documents but we decided to add them so that iterating and seeking synthetic
107+
// _id terms over tombstones also work as if a regular _id field was present.
104108
var timeSeriesId = TsidExtractingIdFieldMapper.extractTimeSeriesIdFromSyntheticId(uid);
105109
var timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(uid);
106110
var routingHash = TsidExtractingIdFieldMapper.extractRoutingHashBytesFromSyntheticId(uid);

0 commit comments

Comments
 (0)