|
27 | 27 | import org.apache.lucene.index.LogByteSizeMergePolicy; |
28 | 28 | import org.apache.lucene.index.NumericDocValues; |
29 | 29 | import org.apache.lucene.index.SortedDocValues; |
| 30 | +import org.apache.lucene.search.DocIdSetIterator; |
30 | 31 | import org.apache.lucene.search.IndexSearcher; |
31 | 32 | import org.apache.lucene.search.Sort; |
32 | 33 | import org.apache.lucene.search.SortField; |
|
49 | 50 | import java.util.function.Supplier; |
50 | 51 | import java.util.stream.IntStream; |
51 | 52 |
|
| 53 | +import static org.elasticsearch.test.ESTestCase.randomFrom; |
| 54 | + |
52 | 55 | public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { |
53 | 56 |
|
54 | 57 | final Codec codec = TestUtil.alwaysDocValuesFormat(new ES819TSDBDocValuesFormat()); |
@@ -959,6 +962,134 @@ private static BulkNumericDocValues getBulkNumericDocValues(LeafReader leafReade |
959 | 962 | return (BulkNumericDocValues) DocValues.unwrapSingleton(leafReader.getSortedNumericDocValues(counterField)); |
960 | 963 | } |
961 | 964 |
|
| 965 | + public void testDocIDEndRun() throws IOException { |
| 966 | + String timestampField = "@timestamp"; |
| 967 | + String hostnameField = "host.name"; |
| 968 | + long baseTimestamp = 1704067200000L; |
| 969 | + |
| 970 | + var config = getTimeSeriesIndexWriterConfig(hostnameField, timestampField); |
| 971 | + try (var dir = newDirectory(); var iw = new IndexWriter(dir, config)) { |
| 972 | + long counter1 = 0; |
| 973 | + |
| 974 | + long[] gauge2Values = new long[] { -2, -4, -6, -8, -10, -12, -14, -16 }; |
| 975 | + String[] tags = new String[] { "tag_1", "tag_2", "tag_3", "tag_4", "tag_5", "tag_6", "tag_7", "tag_8" }; |
| 976 | + |
| 977 | + // IndexedDISI stores ids in blocks of 4096. To test sparse end runs, we want a mixture of |
| 978 | + // dense and sparse blocks, so we need the gap frequency to be larger than |
| 979 | + // this value, but smaller than two blocks, and to index at least three blocks |
| 980 | + int gap_frequency = 4500 + random().nextInt(2048); |
| 981 | + int numDocs = 10000 + random().nextInt(10000); |
| 982 | + int numHosts = numDocs / 20; |
| 983 | + |
| 984 | + for (int i = 0; i < numDocs; i++) { |
| 985 | + var d = new Document(); |
| 986 | + |
| 987 | + int batchIndex = i / numHosts; |
| 988 | + String hostName = String.format(Locale.ROOT, "host-%03d", batchIndex); |
| 989 | + long timestamp = baseTimestamp + (1000L * i); |
| 990 | + |
| 991 | + d.add(new SortedDocValuesField(hostnameField, new BytesRef(hostName))); |
| 992 | + // Index sorting doesn't work with NumericDocValuesField: |
| 993 | + d.add(new SortedNumericDocValuesField(timestampField, timestamp)); |
| 994 | + d.add(new NumericDocValuesField("counter", counter1++)); |
| 995 | + if (i % gap_frequency != 0) { |
| 996 | + d.add(new NumericDocValuesField("sparse_counter", counter1)); |
| 997 | + } |
| 998 | + |
| 999 | + int numGauge2 = 1 + random().nextInt(8); |
| 1000 | + for (int j = 0; j < numGauge2; j++) { |
| 1001 | + d.add(new SortedNumericDocValuesField("gauge", gauge2Values[(i + j) % gauge2Values.length])); |
| 1002 | + if (i % gap_frequency != 0) { |
| 1003 | + d.add(new SortedNumericDocValuesField("sparse_gauge", gauge2Values[(i + j) % gauge2Values.length])); |
| 1004 | + } |
| 1005 | + } |
| 1006 | + |
| 1007 | + d.add(new SortedDocValuesField("tag", new BytesRef(randomFrom(tags)))); |
| 1008 | + if (i % gap_frequency != 0) { |
| 1009 | + d.add(new SortedDocValuesField("sparse_tag", new BytesRef(randomFrom(tags)))); |
| 1010 | + } |
| 1011 | + |
| 1012 | + int numTags = 1 + random().nextInt(8); |
| 1013 | + for (int j = 0; j < numTags; j++) { |
| 1014 | + d.add(new SortedSetDocValuesField("tags", new BytesRef(tags[(i + j) % tags.length]))); |
| 1015 | + if (i % gap_frequency != 0) { |
| 1016 | + d.add(new SortedSetDocValuesField("sparse_tags", new BytesRef(tags[(i + j) % tags.length]))); |
| 1017 | + } |
| 1018 | + } |
| 1019 | + |
| 1020 | + d.add(new BinaryDocValuesField("tags_as_bytes", new BytesRef(tags[i % tags.length]))); |
| 1021 | + if (i % gap_frequency != 0) { |
| 1022 | + d.add(new BinaryDocValuesField("sparse_tags_as_bytes", new BytesRef(tags[i % tags.length]))); |
| 1023 | + } |
| 1024 | + |
| 1025 | + iw.addDocument(d); |
| 1026 | + if (i % 100 == 0) { |
| 1027 | + iw.commit(); |
| 1028 | + } |
| 1029 | + } |
| 1030 | + iw.commit(); |
| 1031 | + |
| 1032 | + iw.forceMerge(1); |
| 1033 | + |
| 1034 | + try (var reader = DirectoryReader.open(iw)) { |
| 1035 | + assertEquals(1, reader.leaves().size()); |
| 1036 | + assertEquals(numDocs, reader.maxDoc()); |
| 1037 | + var leaf = reader.leaves().get(0).reader(); |
| 1038 | + var hostNameDV = leaf.getSortedDocValues(hostnameField); |
| 1039 | + assertNotNull(hostNameDV); |
| 1040 | + validateRunEnd(hostNameDV); |
| 1041 | + var timestampDV = DocValues.unwrapSingleton(leaf.getSortedNumericDocValues(timestampField)); |
| 1042 | + assertNotNull(timestampDV); |
| 1043 | + validateRunEnd(timestampDV); |
| 1044 | + var counterOneDV = leaf.getNumericDocValues("counter"); |
| 1045 | + assertNotNull(counterOneDV); |
| 1046 | + validateRunEnd(counterOneDV); |
| 1047 | + var sparseCounter = leaf.getNumericDocValues("sparse_counter"); |
| 1048 | + assertNotNull(sparseCounter); |
| 1049 | + validateRunEnd(sparseCounter); |
| 1050 | + var gaugeOneDV = leaf.getSortedNumericDocValues("gauge"); |
| 1051 | + assertNotNull(gaugeOneDV); |
| 1052 | + validateRunEnd(gaugeOneDV); |
| 1053 | + var sparseGaugeDV = leaf.getSortedNumericDocValues("sparse_gauge"); |
| 1054 | + assertNotNull(sparseGaugeDV); |
| 1055 | + validateRunEnd(sparseGaugeDV); |
| 1056 | + var tagDV = leaf.getSortedDocValues("tag"); |
| 1057 | + assertNotNull(tagDV); |
| 1058 | + validateRunEnd(tagDV); |
| 1059 | + var sparseTagDV = leaf.getSortedDocValues("sparse_tag"); |
| 1060 | + assertNotNull(sparseTagDV); |
| 1061 | + validateRunEnd(sparseTagDV); |
| 1062 | + var tagsDV = leaf.getSortedSetDocValues("tags"); |
| 1063 | + assertNotNull(tagsDV); |
| 1064 | + validateRunEnd(tagsDV); |
| 1065 | + var sparseTagsDV = leaf.getSortedSetDocValues("sparse_tags"); |
| 1066 | + assertNotNull(sparseTagsDV); |
| 1067 | + validateRunEnd(sparseTagsDV); |
| 1068 | + var tagBytesDV = leaf.getBinaryDocValues("tags_as_bytes"); |
| 1069 | + assertNotNull(tagBytesDV); |
| 1070 | + validateRunEnd(tagBytesDV); |
| 1071 | + var sparseTagBytesDV = leaf.getBinaryDocValues("sparse_tags_as_bytes"); |
| 1072 | + assertNotNull(sparseTagBytesDV); |
| 1073 | + validateRunEnd(sparseTagBytesDV); |
| 1074 | + } |
| 1075 | + } |
| 1076 | + } |
| 1077 | + |
| 1078 | + private void validateRunEnd(DocIdSetIterator iterator) throws IOException { |
| 1079 | + int runCount = 0; |
| 1080 | + while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { |
| 1081 | + int runLength = iterator.docIDRunEnd() - iterator.docID() - 1; |
| 1082 | + if (runLength > 1) { |
| 1083 | + runCount++; |
| 1084 | + for (int i = 0; i < runLength; i++) { |
| 1085 | + int expected = iterator.docID() + 1; |
| 1086 | + assertEquals(expected, iterator.advance(expected)); |
| 1087 | + } |
| 1088 | + } |
| 1089 | + } |
| 1090 | + assertTrue("Expected docid runs of greater than 1", runCount > 0); |
| 1091 | + } |
| 1092 | + |
962 | 1093 | private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField) { |
963 | 1094 | var config = new IndexWriterConfig(); |
964 | 1095 | if (hostnameField != null) { |
|
0 commit comments