|
11 | 11 |
|
12 | 12 | import org.apache.lucene.codecs.Codec;
|
13 | 13 | import org.apache.lucene.codecs.DocValuesFormat;
|
| 14 | +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; |
14 | 15 | import org.apache.lucene.document.BinaryDocValuesField;
|
15 | 16 | import org.apache.lucene.document.Document;
|
16 | 17 | import org.apache.lucene.document.NumericDocValuesField;
|
|
21 | 22 | import org.apache.lucene.index.DocValues;
|
22 | 23 | import org.apache.lucene.index.IndexWriter;
|
23 | 24 | import org.apache.lucene.index.IndexWriterConfig;
|
| 25 | +import org.apache.lucene.index.IndexableField; |
| 26 | +import org.apache.lucene.index.LeafReader; |
24 | 27 | import org.apache.lucene.index.LogByteSizeMergePolicy;
|
| 28 | +import org.apache.lucene.index.NumericDocValues; |
| 29 | +import org.apache.lucene.index.SortedDocValues; |
25 | 30 | import org.apache.lucene.search.Sort;
|
26 | 31 | import org.apache.lucene.search.SortField;
|
27 | 32 | import org.apache.lucene.search.SortedNumericSortField;
|
28 | 33 | import org.apache.lucene.util.BytesRef;
|
29 | 34 | import org.elasticsearch.cluster.metadata.DataStream;
|
| 35 | +import org.elasticsearch.common.Randomness; |
| 36 | +import org.elasticsearch.common.util.CollectionUtils; |
30 | 37 | import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;
|
31 | 38 | import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests;
|
| 39 | +import org.elasticsearch.test.ESTestCase; |
32 | 40 |
|
| 41 | +import java.io.IOException; |
| 42 | +import java.util.ArrayList; |
33 | 43 | import java.util.Arrays;
|
| 44 | +import java.util.List; |
34 | 45 | import java.util.Locale;
|
| 46 | +import java.util.function.Supplier; |
| 47 | +import java.util.stream.IntStream; |
35 | 48 |
|
36 | 49 | public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests {
|
37 | 50 |
|
@@ -514,6 +527,184 @@ public void testWithNoValueMultiValue() throws Exception {
|
514 | 527 | }
|
515 | 528 | }
|
516 | 529 |
|
| 530 | + public void testAddIndices() throws IOException { |
| 531 | + String timestampField = "@timestamp"; |
| 532 | + String hostnameField = "host.name"; |
| 533 | + Supplier<IndexWriterConfig> indexConfigWithRandomDVFormat = () -> { |
| 534 | + IndexWriterConfig config = getTimeSeriesIndexWriterConfig(hostnameField, timestampField); |
| 535 | + DocValuesFormat dvFormat = switch (random().nextInt(3)) { |
| 536 | + case 0 -> new ES87TSDBDocValuesFormatTests.TestES87TSDBDocValuesFormat(random().nextInt(4, 16)); |
| 537 | + case 1 -> new ES819TSDBDocValuesFormat(); |
| 538 | + case 2 -> new Lucene90DocValuesFormat(); |
| 539 | + default -> throw new AssertionError("unknown option"); |
| 540 | + }; |
| 541 | + config.setCodec(new Elasticsearch900Lucene101Codec() { |
| 542 | + @Override |
| 543 | + public DocValuesFormat getDocValuesFormatForField(String field) { |
| 544 | + return dvFormat; |
| 545 | + } |
| 546 | + }); |
| 547 | + return config; |
| 548 | + }; |
| 549 | + var allNumericFields = IntStream.range(0, ESTestCase.between(1, 10)).mapToObj(n -> "numeric_" + n).toList(); |
| 550 | + var allSortedNumericFields = IntStream.range(0, ESTestCase.between(1, 10)).mapToObj(n -> "sorted_numeric_" + n).toList(); |
| 551 | + var allSortedFields = IntStream.range(0, ESTestCase.between(1, 10)).mapToObj(n -> "sorted_" + n).toList(); |
| 552 | + var allSortedSetFields = IntStream.range(0, ESTestCase.between(1, 10)).mapToObj(n -> "sorted_set" + n).toList(); |
| 553 | + var allBinaryFields = IntStream.range(0, ESTestCase.between(1, 10)).mapToObj(n -> "binary_" + n).toList(); |
| 554 | + try (var source1 = newDirectory(); var source2 = newDirectory(); var singleDir = newDirectory(); var mergeDir = newDirectory()) { |
| 555 | + try ( |
| 556 | + var writer1 = new IndexWriter(source1, indexConfigWithRandomDVFormat.get()); |
| 557 | + var writer2 = new IndexWriter(source2, indexConfigWithRandomDVFormat.get()); |
| 558 | + var singleWriter = new IndexWriter(singleDir, indexConfigWithRandomDVFormat.get()) |
| 559 | + ) { |
| 560 | + int numDocs = 1 + random().nextInt(1_000); |
| 561 | + long timestamp = random().nextLong(1000_000L); |
| 562 | + for (int i = 0; i < numDocs; i++) { |
| 563 | + List<IndexableField> fields = new ArrayList<>(); |
| 564 | + String hostName = String.format(Locale.ROOT, "host-%d", random().nextInt(5)); |
| 565 | + timestamp += 1 + random().nextInt(1_000); |
| 566 | + fields.add(new SortedDocValuesField(hostnameField, new BytesRef(hostName))); |
| 567 | + fields.add(new SortedNumericDocValuesField(timestampField, timestamp)); |
| 568 | + var numericFields = ESTestCase.randomSubsetOf(allNumericFields); |
| 569 | + for (String f : numericFields) { |
| 570 | + fields.add(new NumericDocValuesField(f, random().nextLong(1000L))); |
| 571 | + } |
| 572 | + var sortedNumericFields = ESTestCase.randomSubsetOf(allSortedNumericFields); |
| 573 | + for (String field : sortedNumericFields) { |
| 574 | + int valueCount = 1 + random().nextInt(3); |
| 575 | + for (int v = 0; v < valueCount; v++) { |
| 576 | + fields.add(new SortedNumericDocValuesField(field, random().nextLong(1000L))); |
| 577 | + } |
| 578 | + } |
| 579 | + var sortedFields = ESTestCase.randomSubsetOf(allSortedFields); |
| 580 | + for (String field : sortedFields) { |
| 581 | + fields.add(new SortedDocValuesField(field, new BytesRef("s" + random().nextInt(100)))); |
| 582 | + } |
| 583 | + var sortedSetFields = ESTestCase.randomSubsetOf(allSortedSetFields); |
| 584 | + for (String field : sortedSetFields) { |
| 585 | + int valueCount = 1 + random().nextInt(3); |
| 586 | + for (int v = 0; v < valueCount; v++) { |
| 587 | + fields.add(new SortedSetDocValuesField(field, new BytesRef("ss" + random().nextInt(100)))); |
| 588 | + } |
| 589 | + } |
| 590 | + List<String> binaryFields = ESTestCase.randomSubsetOf(allBinaryFields); |
| 591 | + for (String field : binaryFields) { |
| 592 | + fields.add(new BinaryDocValuesField(field, new BytesRef("b" + random().nextInt(100)))); |
| 593 | + } |
| 594 | + for (IndexWriter writer : List.of(ESTestCase.randomFrom(writer1, writer2), singleWriter)) { |
| 595 | + Randomness.shuffle(fields); |
| 596 | + writer.addDocument(fields); |
| 597 | + if (random().nextInt(100) <= 5) { |
| 598 | + writer.commit(); |
| 599 | + } |
| 600 | + } |
| 601 | + } |
| 602 | + if (random().nextBoolean()) { |
| 603 | + writer1.forceMerge(1); |
| 604 | + } |
| 605 | + if (random().nextBoolean()) { |
| 606 | + writer2.forceMerge(1); |
| 607 | + } |
| 608 | + singleWriter.commit(); |
| 609 | + singleWriter.forceMerge(1); |
| 610 | + } |
| 611 | + try (var mergeWriter = new IndexWriter(mergeDir, getTimeSeriesIndexWriterConfig(hostnameField, timestampField))) { |
| 612 | + mergeWriter.addIndexes(source1, source2); |
| 613 | + mergeWriter.forceMerge(1); |
| 614 | + } |
| 615 | + try (var reader1 = DirectoryReader.open(singleDir); var reader2 = DirectoryReader.open(mergeDir)) { |
| 616 | + assertEquals(reader1.maxDoc(), reader2.maxDoc()); |
| 617 | + assertEquals(1, reader1.leaves().size()); |
| 618 | + assertEquals(1, reader2.leaves().size()); |
| 619 | + for (int i = 0; i < reader1.leaves().size(); i++) { |
| 620 | + LeafReader leaf1 = reader1.leaves().get(i).reader(); |
| 621 | + LeafReader leaf2 = reader2.leaves().get(i).reader(); |
| 622 | + for (String f : CollectionUtils.appendToCopy(allSortedNumericFields, timestampField)) { |
| 623 | + var dv1 = leaf1.getNumericDocValues(f); |
| 624 | + var dv2 = leaf2.getNumericDocValues(f); |
| 625 | + if (dv1 == null) { |
| 626 | + assertNull(dv2); |
| 627 | + continue; |
| 628 | + } |
| 629 | + assertNotNull(dv2); |
| 630 | + while (dv1.nextDoc() != NumericDocValues.NO_MORE_DOCS) { |
| 631 | + assertNotEquals(NumericDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 632 | + assertEquals(dv1.docID(), dv2.docID()); |
| 633 | + assertEquals(dv1.longValue(), dv2.longValue()); |
| 634 | + } |
| 635 | + assertEquals(NumericDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 636 | + } |
| 637 | + for (String f : CollectionUtils.appendToCopy(allSortedNumericFields, timestampField)) { |
| 638 | + var dv1 = leaf1.getSortedNumericDocValues(f); |
| 639 | + var dv2 = leaf2.getSortedNumericDocValues(f); |
| 640 | + if (dv1 == null) { |
| 641 | + assertNull(dv2); |
| 642 | + continue; |
| 643 | + } |
| 644 | + assertNotNull(dv2); |
| 645 | + while (dv1.nextDoc() != NumericDocValues.NO_MORE_DOCS) { |
| 646 | + assertNotEquals(NumericDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 647 | + assertEquals(dv1.docID(), dv2.docID()); |
| 648 | + assertEquals(dv1.docValueCount(), dv2.docValueCount()); |
| 649 | + for (int v = 0; v < dv1.docValueCount(); v++) { |
| 650 | + assertEquals(dv1.nextValue(), dv2.nextValue()); |
| 651 | + } |
| 652 | + } |
| 653 | + assertEquals(NumericDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 654 | + } |
| 655 | + for (String f : CollectionUtils.appendToCopy(allSortedFields, hostnameField)) { |
| 656 | + var dv1 = leaf1.getSortedDocValues(f); |
| 657 | + var dv2 = leaf2.getSortedDocValues(f); |
| 658 | + if (dv1 == null) { |
| 659 | + assertNull(dv2); |
| 660 | + continue; |
| 661 | + } |
| 662 | + assertNotNull(dv2); |
| 663 | + while (dv1.nextDoc() != SortedDocValues.NO_MORE_DOCS) { |
| 664 | + assertNotEquals(SortedDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 665 | + assertEquals(dv1.docID(), dv2.docID()); |
| 666 | + assertEquals(dv1.lookupOrd(dv1.ordValue()), dv2.lookupOrd(dv2.ordValue())); |
| 667 | + } |
| 668 | + assertEquals(NumericDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 669 | + } |
| 670 | + for (String f : allSortedSetFields) { |
| 671 | + var dv1 = leaf1.getSortedSetDocValues(f); |
| 672 | + var dv2 = leaf2.getSortedSetDocValues(f); |
| 673 | + if (dv1 == null) { |
| 674 | + assertNull(dv2); |
| 675 | + continue; |
| 676 | + } |
| 677 | + assertNotNull(dv2); |
| 678 | + while (dv1.nextDoc() != SortedDocValues.NO_MORE_DOCS) { |
| 679 | + assertNotEquals(SortedDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 680 | + assertEquals(dv1.docID(), dv2.docID()); |
| 681 | + assertEquals(dv1.docValueCount(), dv2.docValueCount()); |
| 682 | + for (int v = 0; v < dv1.docValueCount(); v++) { |
| 683 | + assertEquals(dv1.lookupOrd(dv1.nextOrd()), dv2.lookupOrd(dv2.nextOrd())); |
| 684 | + } |
| 685 | + } |
| 686 | + assertEquals(NumericDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 687 | + } |
| 688 | + for (String f : allBinaryFields) { |
| 689 | + var dv1 = leaf1.getBinaryDocValues(f); |
| 690 | + var dv2 = leaf2.getBinaryDocValues(f); |
| 691 | + if (dv1 == null) { |
| 692 | + assertNull(dv2); |
| 693 | + continue; |
| 694 | + } |
| 695 | + assertNotNull(dv2); |
| 696 | + while (dv1.nextDoc() != SortedDocValues.NO_MORE_DOCS) { |
| 697 | + assertNotEquals(SortedDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 698 | + assertEquals(dv1.docID(), dv2.docID()); |
| 699 | + assertEquals(dv1.binaryValue(), dv2.binaryValue()); |
| 700 | + } |
| 701 | + assertEquals(NumericDocValues.NO_MORE_DOCS, dv2.nextDoc()); |
| 702 | + } |
| 703 | + } |
| 704 | + } |
| 705 | + } |
| 706 | + } |
| 707 | + |
517 | 708 | private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField) {
|
518 | 709 | var config = new IndexWriterConfig();
|
519 | 710 | config.setIndexSort(
|
|
0 commit comments