Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@ private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOEx
// Special case for maxOrd 1, no need to read blocks and use ordinal 0 as only value
if (entry.docsWithFieldOffset == -1) {
// Special case when all docs have a value
return new NumericDocValues() {
return new BaseDenseNumericValues() {

private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
private int doc = -1;
Expand Down Expand Up @@ -1124,6 +1124,17 @@ public boolean advanceExact(int target) {
public long cost() {
return maxDoc;
}

@Override
long lookAheadValueAt(int targetDoc) throws IOException {
return 0L; // Only one ordinal!
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

easy one :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the test found it :)

}

@Override
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset)
throws IOException {
return null;
}
};
} else {
final IndexedDISI disi = new IndexedDISI(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,30 +24,40 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.cluster.metadata.DataStream;
import org.elasticsearch.common.Randomness;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.TestBlock;
import org.elasticsearch.test.ESTestCase;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.IntStream;

import static org.hamcrest.Matchers.equalTo;

public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests {

private final Codec codec = new Elasticsearch900Lucene101Codec() {
Expand Down Expand Up @@ -971,6 +981,117 @@ public void testBulkLoadingWithSparseDocs() throws Exception {
}
}

public void testLoadKeywordFieldWithIndexSorts() throws IOException {
String primaryField = "sorted_first";
String secondField = "sorted_second";
String unsortedField = "no_sort";
String sparseField = "sparse";
var config = new IndexWriterConfig();
config.setIndexSort(new Sort(new SortField(primaryField, SortField.Type.STRING, false)));
config.setMergePolicy(new LogByteSizeMergePolicy());
config.setCodec(getCodec());
Map<Integer, String> hostnames = new HashMap<>();
try (Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, config)) {
int numDocs = ESTestCase.randomIntBetween(100, 5000);
for (int i = 0; i < numDocs; i++) {
hostnames.put(i, "h" + random().nextInt(10));
}
List<Integer> ids = new ArrayList<>(hostnames.keySet());
Randomness.shuffle(ids);
Set<Integer> sparseIds = new HashSet<>(ESTestCase.randomSubsetOf(ESTestCase.between(1, ids.size() / 2), ids));
for (Integer id : ids) {
var d = new Document();
String hostname = hostnames.get(id);
d.add(new NumericDocValuesField("id", id));
d.add(new SortedDocValuesField(primaryField, new BytesRef(hostname)));
d.add(new SortedDocValuesField(secondField, new BytesRef(hostname)));
d.add(new SortedDocValuesField(unsortedField, new BytesRef(hostname)));
if (sparseIds.contains(id)) {
d.add(new SortedDocValuesField(sparseField, new BytesRef(hostname)));
}
writer.addDocument(d);
if (random().nextInt(100) < 10) {
writer.flush();
}
}
for (int iter = 0; iter < 2; iter++) {
var factory = TestBlock.factory();
try (DirectoryReader reader = DirectoryReader.open(writer)) {
for (LeafReaderContext leaf : reader.leaves()) {
BlockLoader.Docs docs = new BlockLoader.Docs() {
@Override
public int count() {
return leaf.reader().maxDoc();
}

@Override
public int get(int i) {
return i;
}
};
var idReader = ESTestCase.asInstanceOf(
BlockLoader.OptionalColumnAtATimeReader.class,
leaf.reader().getNumericDocValues("id")
);
TestBlock idBlock = (TestBlock) idReader.tryRead(factory, docs, 0);
assertNotNull(idBlock);
var reader2 = ESTestCase.asInstanceOf(
BlockLoader.OptionalColumnAtATimeReader.class,
leaf.reader().getSortedDocValues(secondField)
);
assertNull(reader2.tryRead(factory, docs, 0));
var reader3 = ESTestCase.asInstanceOf(
BlockLoader.OptionalColumnAtATimeReader.class,
leaf.reader().getSortedDocValues(unsortedField)
);
assertNull(reader3.tryRead(factory, docs, 0));
for (int offset = 0; offset < idBlock.size(); offset += ESTestCase.between(1, numDocs)) {
int start = offset;
var reader1 = ESTestCase.asInstanceOf(
BlockLoader.OptionalColumnAtATimeReader.class,
leaf.reader().getSortedDocValues(primaryField)
);
while (start < idBlock.size()) {
int end = start + random().nextInt(idBlock.size() - start);
TestBlock hostBlock = (TestBlock) reader1.tryRead(factory, new BlockLoader.Docs() {
@Override
public int count() {
return end + 1;
}

@Override
public int get(int docId) {
return docId;
}
}, start);
Set<String> seenValues = new HashSet<>();
for (int p = start; p <= end; p++) {
String hostName = hostnames.get(((Number) idBlock.get(p)).intValue());
seenValues.add(hostName);
}
if (seenValues.size() == 1) {
assertNotNull(hostBlock);
assertThat(hostBlock.size(), equalTo(end - start + 1));
for (int i = 0; i < hostBlock.size(); i++) {
String actualHostName = BytesRefs.toString(hostBlock.get(i));
assertThat(actualHostName, equalTo(hostnames.get(((Number) idBlock.get(i + start)).intValue())));
}
} else {
assertNull(hostBlock);
}
if (start == idBlock.size() - 1) {
break;
}
start = end + ESTestCase.between(0, 10);
}
}
writer.forceMerge(1);
}
}
}
}
}

private static ES819TSDBDocValuesProducer.BaseDenseNumericValues getColumnAtTimeReader(LeafReader leafReader, String counterField)
throws IOException {
return (ES819TSDBDocValuesProducer.BaseDenseNumericValues) DocValues.unwrapSingleton(
Expand Down