Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,10 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
writeField(field, producer, -1, null);
}

private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue) {
private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue, long numValues) {
return maxDoc > 1
&& field.number == primarySortFieldNumber
&& numDocsWithValue == numValues // Only single valued fields can be supported with range encoded ordinals format
&& (numDocsWithValue / maxOrd) >= minDocsPerOrdinalForOrdinalRangeEncoding;
}

Expand Down Expand Up @@ -167,7 +168,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
if (maxOrd == 1) {
// Special case for maxOrd of 1, signal -1 that no blocks will be written
meta.writeInt(-1);
} else if (shouldEncodeOrdinalRange(field, maxOrd, numDocsWithValue)) {
} else if (shouldEncodeOrdinalRange(field, maxOrd, numDocsWithValue, numValues)) {
// When a field is sorted, use ordinal range encode for long runs of the same ordinal.
meta.writeInt(-2);
meta.writeVInt(Math.toIntExact(maxOrd));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1638,6 +1638,7 @@ private SortedNumericDocValues getSortedNumeric(SortedNumericEntry entry, long m
final RandomAccessInput addressesInput = data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput, merging);

assert entry.sortedOrdinals == null : "encoded ordinal range supports only one value per document";
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Invoking getValues(...) will result into a NPE if entry.sortedOrdinals != null.
Maybe we can return a singleton SortedNumericDocValues instance here, so that at least the the first value of every document is available?

final NumericValues values = getValues(entry, maxOrd);

if (entry.docsWithFieldOffset == -1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.cluster.metadata.DataStream;
Expand Down Expand Up @@ -1302,6 +1303,79 @@ public int get(int docId) {
}
}

public void testEncodeRangeWithSortedSetPrimarySortField() throws Exception {
String timestampField = "@timestamp";
String hostnameField = "host.name";
long baseTimestamp = 1704067200000L;

var config = getTimeSeriesIndexWriterConfig(hostnameField, true, timestampField);
try (var dir = newDirectory(); var iw = new IndexWriter(dir, config)) {

int numDocs = 512 + random().nextInt(512);
int numHosts = numDocs / 20;

for (int i = 0; i < numDocs; i++) {
var d = new Document();
int batchIndex = i / numHosts;
{
String hostName = String.format(Locale.ROOT, "host-%03d", batchIndex);
d.add(new SortedSetDocValuesField(hostnameField, new BytesRef(hostName)));
}
{
String hostName = String.format(Locale.ROOT, "host-%03d", batchIndex + 1);
d.add(new SortedSetDocValuesField(hostnameField, new BytesRef(hostName)));
}
// Index sorting doesn't work with NumericDocValuesField:
long timestamp = baseTimestamp + (1000L * i);
d.add(new SortedNumericDocValuesField(timestampField, timestamp));
iw.addDocument(d);
if (i % 100 == 0) {
iw.commit();
}
}
iw.commit();
iw.forceMerge(1);

try (var reader = DirectoryReader.open(iw)) {
assertEquals(1, reader.leaves().size());
assertEquals(numDocs, reader.maxDoc());
var leaf = reader.leaves().get(0).reader();
var hostNameDV = leaf.getSortedSetDocValues(hostnameField);
assertNotNull(hostNameDV);
var timestampDV = DocValues.unwrapSingleton(leaf.getSortedNumericDocValues(timestampField));
assertNotNull(timestampDV);
for (int i = 0; i < numDocs; i++) {
assertEquals(i, hostNameDV.nextDoc());

int batchIndex = i / numHosts;
assertEquals(2, hostNameDV.docValueCount());

long firstOrd = hostNameDV.nextOrd();
assertEquals(batchIndex, firstOrd);
String expectedFirstHostName = String.format(Locale.ROOT, "host-%03d", batchIndex);
String actualFirstHostName = hostNameDV.lookupOrd(firstOrd).utf8ToString();
assertEquals(expectedFirstHostName, actualFirstHostName);

batchIndex++;
long secondOrd = hostNameDV.nextOrd();
assertEquals(batchIndex, secondOrd);
String expectedSecondHostName = String.format(Locale.ROOT, "host-%03d", batchIndex);
String actualSecondHostName = hostNameDV.lookupOrd(secondOrd).utf8ToString();
assertEquals(expectedSecondHostName, actualSecondHostName);

assertEquals(i, timestampDV.nextDoc());
long timestamp = timestampDV.longValue();
long lowerBound = baseTimestamp;
long upperBound = baseTimestamp + (1000L * numDocs);
assertTrue(
"unexpected timestamp [" + timestamp + "], expected between [" + lowerBound + "] and [" + upperBound + "]",
timestamp >= lowerBound && timestamp < upperBound
);
}
}
}
}

private static BaseDenseNumericValues getBaseDenseNumericValues(LeafReader leafReader, String field) throws IOException {
return (BaseDenseNumericValues) DocValues.unwrapSingleton(leafReader.getSortedNumericDocValues(field));
}
Expand All @@ -1315,11 +1389,15 @@ private static BaseSortedDocValues getBaseSortedDocValues(LeafReader leafReader,
}

private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField) {
return getTimeSeriesIndexWriterConfig(hostnameField, false, timestampField);
}

private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, boolean multiValued, String timestampField) {
var config = new IndexWriterConfig();
if (hostnameField != null) {
config.setIndexSort(
new Sort(
new SortField(hostnameField, SortField.Type.STRING, false),
multiValued ? new SortedSetSortField(hostnameField, false) : new SortField(hostnameField, SortField.Type.STRING, false),
new SortedNumericSortField(timestampField, SortField.Type.LONG, true)
)
);
Expand Down