Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,10 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
writeField(field, producer, -1, null);
}

private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue) {
private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue, long numValues) {
return maxDoc > 1
&& field.number == primarySortFieldNumber
&& numDocsWithValue == numValues // Only single valued fields can be supported with range encoded ordinals format
&& (numDocsWithValue / maxOrd) >= minDocsPerOrdinalForOrdinalRangeEncoding;
}

Expand Down Expand Up @@ -167,7 +168,8 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
if (maxOrd == 1) {
// Special case for maxOrd of 1, signal -1 that no blocks will be written
meta.writeInt(-1);
} else if (shouldEncodeOrdinalRange(field, maxOrd, numDocsWithValue)) {
} else if (shouldEncodeOrdinalRange(field, maxOrd, numDocsWithValue, numValues)) {
assert offsetsAccumulator == null;
// When a field is sorted, use ordinal range encode for long runs of the same ordinal.
meta.writeInt(-2);
meta.writeVInt(Math.toIntExact(maxOrd));
Expand All @@ -188,9 +190,6 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
if (disiAccumulator != null) {
disiAccumulator.addDocId(doc);
}
if (offsetsAccumulator != null) {
offsetsAccumulator.addDoc(1);
}
final long nextOrd = values.nextValue();
if (nextOrd != lastOrd) {
lastOrd = nextOrd;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1318,42 +1318,7 @@ public long longValue() throws IOException {
};
}
} else if (entry.sortedOrdinals != null) {
final var ordinalsReader = new SortedOrdinalReader(
maxOrd,
DirectMonotonicReader.getInstance(
entry.sortedOrdinals,
data.randomAccessSlice(entry.valuesOffset, entry.valuesLength),
true
)
);
if (entry.docsWithFieldOffset == -1) {
return new BaseDenseNumericValues(maxDoc) {
@Override
long lookAheadValueAt(int targetDoc) {
return ordinalsReader.lookAheadValue(targetDoc);
}

@Override
public long longValue() {
return ordinalsReader.readValueAndAdvance(doc);
}
};
} else {
final var disi = new IndexedDISI(
data,
entry.docsWithFieldOffset,
entry.docsWithFieldLength,
entry.jumpTableEntryCount,
entry.denseRankPower,
entry.numValues
);
return new BaseSparseNumericValues(disi) {
@Override
public long longValue() {
return ordinalsReader.readValueAndAdvance(disi.docID());
}
};
}
return getRangeEncodedNumericDocValues(entry, maxOrd);
}

// NOTE: we could make this a bit simpler by reusing #getValues but this
Expand Down Expand Up @@ -1596,6 +1561,41 @@ public BlockLoader.Block tryRead(
}
}

private NumericDocValues getRangeEncodedNumericDocValues(NumericEntry entry, long maxOrd) throws IOException {
final var ordinalsReader = new SortedOrdinalReader(
maxOrd,
DirectMonotonicReader.getInstance(entry.sortedOrdinals, data.randomAccessSlice(entry.valuesOffset, entry.valuesLength), true)
);
if (entry.docsWithFieldOffset == -1) {
return new BaseDenseNumericValues(maxDoc) {
@Override
long lookAheadValueAt(int targetDoc) {
return ordinalsReader.lookAheadValue(targetDoc);
}

@Override
public long longValue() {
return ordinalsReader.readValueAndAdvance(doc);
}
};
} else {
final var disi = new IndexedDISI(
data,
entry.docsWithFieldOffset,
entry.docsWithFieldLength,
entry.jumpTableEntryCount,
entry.denseRankPower,
entry.numValues
);
return new BaseSparseNumericValues(disi) {
@Override
public long longValue() {
return ordinalsReader.readValueAndAdvance(disi.docID());
}
};
}
}

private NumericValues getValues(NumericEntry entry, final long maxOrd) throws IOException {
assert entry.numValues > 0;
final RandomAccessInput indexSlice = data.randomAccessSlice(entry.indexOffset, entry.indexLength);
Expand Down Expand Up @@ -1638,6 +1638,14 @@ private SortedNumericDocValues getSortedNumeric(SortedNumericEntry entry, long m
final RandomAccessInput addressesInput = data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput, merging);

assert entry.sortedOrdinals == null : "encoded ordinal range supports only one value per document";
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Invoking getValues(...) will result into a NPE if entry.sortedOrdinals != null.
Maybe we can return a singleton SortedNumericDocValues instance here, so that at least the the first value of every document is available?

if (entry.sortedOrdinals != null) {
// TODO: determine when this can be removed.
// This is for the clusters that ended up using ordinal range encoding for multi-values fields. Only first value can be
// returned.
NumericDocValues values = getRangeEncodedNumericDocValues(entry, maxOrd);
return DocValues.singleton(values);
}
final NumericValues values = getValues(entry, maxOrd);

if (entry.docsWithFieldOffset == -1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.cluster.metadata.DataStream;
Expand Down Expand Up @@ -1302,6 +1303,79 @@ public int get(int docId) {
}
}

public void testEncodeRangeWithSortedSetPrimarySortField() throws Exception {
String timestampField = "@timestamp";
String hostnameField = "host.name";
long baseTimestamp = 1704067200000L;

var config = getTimeSeriesIndexWriterConfig(hostnameField, true, timestampField);
try (var dir = newDirectory(); var iw = new IndexWriter(dir, config)) {

int numDocs = 512 + random().nextInt(512);
int numHosts = numDocs / 20;

for (int i = 0; i < numDocs; i++) {
var d = new Document();
int batchIndex = i / numHosts;
{
String hostName = String.format(Locale.ROOT, "host-%03d", batchIndex);
d.add(new SortedSetDocValuesField(hostnameField, new BytesRef(hostName)));
}
{
String hostName = String.format(Locale.ROOT, "host-%03d", batchIndex + 1);
d.add(new SortedSetDocValuesField(hostnameField, new BytesRef(hostName)));
}
// Index sorting doesn't work with NumericDocValuesField:
long timestamp = baseTimestamp + (1000L * i);
d.add(new SortedNumericDocValuesField(timestampField, timestamp));
iw.addDocument(d);
if (i % 100 == 0) {
iw.commit();
}
}
iw.commit();
iw.forceMerge(1);

try (var reader = DirectoryReader.open(iw)) {
assertEquals(1, reader.leaves().size());
assertEquals(numDocs, reader.maxDoc());
var leaf = reader.leaves().get(0).reader();
var hostNameDV = leaf.getSortedSetDocValues(hostnameField);
assertNotNull(hostNameDV);
var timestampDV = DocValues.unwrapSingleton(leaf.getSortedNumericDocValues(timestampField));
assertNotNull(timestampDV);
for (int i = 0; i < numDocs; i++) {
assertEquals(i, hostNameDV.nextDoc());

int batchIndex = i / numHosts;
assertEquals(2, hostNameDV.docValueCount());

long firstOrd = hostNameDV.nextOrd();
assertEquals(batchIndex, firstOrd);
String expectedFirstHostName = String.format(Locale.ROOT, "host-%03d", batchIndex);
String actualFirstHostName = hostNameDV.lookupOrd(firstOrd).utf8ToString();
assertEquals(expectedFirstHostName, actualFirstHostName);

batchIndex++;
long secondOrd = hostNameDV.nextOrd();
assertEquals(batchIndex, secondOrd);
String expectedSecondHostName = String.format(Locale.ROOT, "host-%03d", batchIndex);
String actualSecondHostName = hostNameDV.lookupOrd(secondOrd).utf8ToString();
assertEquals(expectedSecondHostName, actualSecondHostName);

assertEquals(i, timestampDV.nextDoc());
long timestamp = timestampDV.longValue();
long lowerBound = baseTimestamp;
long upperBound = baseTimestamp + (1000L * numDocs);
assertTrue(
"unexpected timestamp [" + timestamp + "], expected between [" + lowerBound + "] and [" + upperBound + "]",
timestamp >= lowerBound && timestamp < upperBound
);
}
}
}
}

private static BaseDenseNumericValues getBaseDenseNumericValues(LeafReader leafReader, String field) throws IOException {
return (BaseDenseNumericValues) DocValues.unwrapSingleton(leafReader.getSortedNumericDocValues(field));
}
Expand All @@ -1315,11 +1389,15 @@ private static BaseSortedDocValues getBaseSortedDocValues(LeafReader leafReader,
}

private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField) {
return getTimeSeriesIndexWriterConfig(hostnameField, false, timestampField);
}

private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, boolean multiValued, String timestampField) {
var config = new IndexWriterConfig();
if (hostnameField != null) {
config.setIndexSort(
new Sort(
new SortField(hostnameField, SortField.Type.STRING, false),
multiValued ? new SortedSetSortField(hostnameField, false) : new SortField(hostnameField, SortField.Type.STRING, false),
new SortedNumericSortField(timestampField, SortField.Type.LONG, true)
)
);
Expand Down