|
11 | 11 | import org.apache.lucene.analysis.Analyzer;
|
12 | 12 | import org.apache.lucene.codecs.Codec;
|
13 | 13 | import org.apache.lucene.document.Document;
|
| 14 | +import org.apache.lucene.document.Field; |
14 | 15 | import org.apache.lucene.document.SortedDocValuesField;
|
| 16 | +import org.apache.lucene.document.SortedNumericDocValuesField; |
15 | 17 | import org.apache.lucene.document.SortedSetDocValuesField;
|
| 18 | +import org.apache.lucene.document.StringField; |
16 | 19 | import org.apache.lucene.index.DirectoryReader;
|
17 | 20 | import org.apache.lucene.index.IndexReader;
|
| 21 | +import org.apache.lucene.index.IndexWriter; |
18 | 22 | import org.apache.lucene.index.IndexWriterConfig;
|
| 23 | +import org.apache.lucene.index.LeafReaderContext; |
19 | 24 | import org.apache.lucene.index.SortedDocValues;
|
| 25 | +import org.apache.lucene.index.SortedNumericDocValues; |
20 | 26 | import org.apache.lucene.index.SortedSetDocValues;
|
| 27 | +import org.apache.lucene.index.StoredFields; |
21 | 28 | import org.apache.lucene.search.DocIdSetIterator;
|
22 | 29 | import org.apache.lucene.store.Directory;
|
23 | 30 | import org.apache.lucene.tests.analysis.MockAnalyzer;
|
|
27 | 34 | import org.apache.lucene.util.BytesRef;
|
28 | 35 |
|
29 | 36 | import java.io.IOException;
|
| 37 | +import java.util.ArrayList; |
| 38 | +import java.util.Arrays; |
| 39 | +import java.util.HashMap; |
| 40 | +import java.util.List; |
| 41 | +import java.util.Map; |
| 42 | + |
| 43 | +import static org.hamcrest.Matchers.equalTo; |
| 44 | +import static org.hamcrest.Matchers.greaterThanOrEqualTo; |
30 | 45 |
|
31 | 46 | public class ES87TSDBDocValuesFormatTests extends BaseDocValuesFormatTestCase {
|
32 | 47 |
|
@@ -116,4 +131,116 @@ public void testSortedSetDocValuesSingleUniqueValue() throws IOException {
|
116 | 131 | }
|
117 | 132 | }
|
118 | 133 |
|
| 134 | + public void testOneDocManyValues() throws Exception { |
| 135 | + IndexWriterConfig config = new IndexWriterConfig(); |
| 136 | + try (Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, config)) { |
| 137 | + int numValues = 128 + random().nextInt(1024); // > 2^7 to require two blocks |
| 138 | + Document d = new Document(); |
| 139 | + for (int i = 0; i < numValues; i++) { |
| 140 | + d.add(new SortedSetDocValuesField("dv", new BytesRef("v-" + i))); |
| 141 | + } |
| 142 | + writer.addDocument(d); |
| 143 | + try (DirectoryReader reader = DirectoryReader.open(writer)) { |
| 144 | + LeafReaderContext leaf = reader.leaves().get(0); |
| 145 | + SortedSetDocValues dv = leaf.reader().getSortedSetDocValues("dv"); |
| 146 | + for (int i = 0; i < 3; i++) { |
| 147 | + assertTrue(dv.advanceExact(0)); |
| 148 | + assertThat(dv.docValueCount(), equalTo(numValues)); |
| 149 | + for (int v = 0; v < dv.docValueCount(); v++) { |
| 150 | + assertThat(dv.nextOrd(), greaterThanOrEqualTo(0L)); |
| 151 | + } |
| 152 | + } |
| 153 | + } |
| 154 | + } |
| 155 | + } |
| 156 | + |
| 157 | + public void testManyDocsWithManyValues() throws Exception { |
| 158 | + final int numDocs = 10 + random().nextInt(20); |
| 159 | + final Map<String, List<String>> sortedSet = new HashMap<>(); // key -> doc-values |
| 160 | + final Map<String, long[]> sortedNumbers = new HashMap<>(); // key -> numbers |
| 161 | + try (Directory directory = newDirectory()) { |
| 162 | + IndexWriterConfig conf = newIndexWriterConfig(); |
| 163 | + try (RandomIndexWriter writer = new RandomIndexWriter(random(), directory, conf)) { |
| 164 | + for (int i = 0; i < numDocs; i++) { |
| 165 | + Document doc = new Document(); |
| 166 | + String key = "k-" + i; |
| 167 | + doc.add(new StringField("key", new BytesRef(key), Field.Store.YES)); |
| 168 | + int numValues = random().nextInt(600); |
| 169 | + List<String> binary = new ArrayList<>(); |
| 170 | + for (int v = 0; v < numValues; v++) { |
| 171 | + String dv = "v-" + random().nextInt(3) + ":" + v; |
| 172 | + binary.add(dv); |
| 173 | + doc.add(new SortedSetDocValuesField("binary", new BytesRef(dv))); |
| 174 | + } |
| 175 | + sortedSet.put(key, binary.stream().sorted().toList()); |
| 176 | + numValues = random().nextInt(600); |
| 177 | + long[] numbers = new long[numValues]; |
| 178 | + for (int v = 0; v < numValues; v++) { |
| 179 | + numbers[v] = random().nextInt(10) * 1000 + v; |
| 180 | + doc.add(new SortedNumericDocValuesField("numbers", numbers[v])); |
| 181 | + } |
| 182 | + Arrays.sort(numbers); |
| 183 | + sortedNumbers.put(key, numbers); |
| 184 | + writer.addDocument(doc); |
| 185 | + } |
| 186 | + writer.commit(); |
| 187 | + } |
| 188 | + try (IndexReader reader = maybeWrapWithMergingReader(DirectoryReader.open(directory))) { |
| 189 | + for (LeafReaderContext leaf : reader.leaves()) { |
| 190 | + StoredFields storedFields = leaf.reader().storedFields(); |
| 191 | + int iters = 1 + random().nextInt(5); |
| 192 | + for (int i = 0; i < iters; i++) { |
| 193 | + // check with binary |
| 194 | + SortedSetDocValues binaryDV = leaf.reader().getSortedSetDocValues("binary"); |
| 195 | + int doc = random().nextInt(leaf.reader().maxDoc()); |
| 196 | + while ((doc = binaryDV.advance(doc)) != DocIdSetIterator.NO_MORE_DOCS) { |
| 197 | + String key = storedFields.document(doc).getBinaryValue("key").utf8ToString(); |
| 198 | + List<String> expected = sortedSet.get(key); |
| 199 | + List<String> actual = new ArrayList<>(); |
| 200 | + for (int v = 0; v < binaryDV.docValueCount(); v++) { |
| 201 | + long ord = binaryDV.nextOrd(); |
| 202 | + actual.add(binaryDV.lookupOrd(ord).utf8ToString()); |
| 203 | + } |
| 204 | + assertEquals(expected, actual); |
| 205 | + int repeats = random().nextInt(3); |
| 206 | + for (int r = 0; r < repeats; r++) { |
| 207 | + assertTrue(binaryDV.advanceExact(doc)); |
| 208 | + actual.clear(); |
| 209 | + for (int v = 0; v < binaryDV.docValueCount(); v++) { |
| 210 | + long ord = binaryDV.nextOrd(); |
| 211 | + actual.add(binaryDV.lookupOrd(ord).utf8ToString()); |
| 212 | + } |
| 213 | + assertEquals(expected, actual); |
| 214 | + } |
| 215 | + doc++; |
| 216 | + doc += random().nextInt(3); |
| 217 | + } |
| 218 | + // check with numbers |
| 219 | + doc = random().nextInt(leaf.reader().maxDoc()); |
| 220 | + SortedNumericDocValues numbersDV = leaf.reader().getSortedNumericDocValues("numbers"); |
| 221 | + while ((doc = numbersDV.advance(doc)) != DocIdSetIterator.NO_MORE_DOCS) { |
| 222 | + String key = storedFields.document(doc).getBinaryValue("key").utf8ToString(); |
| 223 | + long[] expected = sortedNumbers.get(key); |
| 224 | + long[] actual = new long[expected.length]; |
| 225 | + for (int v = 0; v < numbersDV.docValueCount(); v++) { |
| 226 | + actual[v] = numbersDV.nextValue(); |
| 227 | + } |
| 228 | + assertArrayEquals(expected, actual); |
| 229 | + int repeats = random().nextInt(3); |
| 230 | + for (int r = 0; r < repeats; r++) { |
| 231 | + assertTrue(numbersDV.advanceExact(doc)); |
| 232 | + actual = new long[expected.length]; |
| 233 | + for (int v = 0; v < numbersDV.docValueCount(); v++) { |
| 234 | + actual[v] = numbersDV.nextValue(); |
| 235 | + } |
| 236 | + assertArrayEquals(expected, actual); |
| 237 | + } |
| 238 | + doc++; |
| 239 | + doc += random().nextInt(3); |
| 240 | + } |
| 241 | + } |
| 242 | + } |
| 243 | + } |
| 244 | + } |
| 245 | + } |
119 | 246 | }
|
0 commit comments