|
27 | 27 | import org.apache.lucene.index.LogByteSizeMergePolicy; |
28 | 28 | import org.apache.lucene.index.NumericDocValues; |
29 | 29 | import org.apache.lucene.index.SortedDocValues; |
| 30 | +import org.apache.lucene.search.DocIdSetIterator; |
30 | 31 | import org.apache.lucene.search.Sort; |
31 | 32 | import org.apache.lucene.search.SortField; |
32 | 33 | import org.apache.lucene.search.SortedNumericSortField; |
|
47 | 48 | import java.util.function.Supplier; |
48 | 49 | import java.util.stream.IntStream; |
49 | 50 |
|
| 51 | +import static org.elasticsearch.test.ESTestCase.randomFrom; |
| 52 | + |
50 | 53 | public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { |
51 | 54 |
|
52 | 55 | final Codec codec = TestUtil.alwaysDocValuesFormat(new ES819TSDBDocValuesFormat()); |
@@ -698,6 +701,135 @@ public DocValuesFormat getDocValuesFormatForField(String field) { |
698 | 701 | } |
699 | 702 | } |
700 | 703 |
|
| 704 | + public void testDocIDEndRun() throws IOException { |
| 705 | + String timestampField = "@timestamp"; |
| 706 | + String hostnameField = "host.name"; |
| 707 | + long baseTimestamp = 1704067200000L; |
| 708 | + |
| 709 | + var config = getTimeSeriesIndexWriterConfig(hostnameField, timestampField); |
| 710 | + try (var dir = newDirectory(); var iw = new IndexWriter(dir, config)) { |
| 711 | + long counter1 = 0; |
| 712 | + |
| 713 | + |
| 714 | + long[] gauge2Values = new long[]{-2, -4, -6, -8, -10, -12, -14, -16}; |
| 715 | + String[] tags = new String[]{"tag_1", "tag_2", "tag_3", "tag_4", "tag_5", "tag_6", "tag_7", "tag_8"}; |
| 716 | + |
| 717 | + // IndexedDISI stores ids in blocks of 4096. To test sparse end runs, we want a mixture of |
| 718 | + // dense and sparse blocks, so we need the gap frequency to be larger than |
| 719 | + // this value, but smaller than two blocks, and to index at least three blocks |
| 720 | + int gap_frequency = 4500 + random().nextInt(2048); |
| 721 | + int numDocs = 10000 + random().nextInt(10000); |
| 722 | + int numHosts = numDocs / 20; |
| 723 | + |
| 724 | + for (int i = 0; i < numDocs; i++) { |
| 725 | + var d = new Document(); |
| 726 | + |
| 727 | + int batchIndex = i / numHosts; |
| 728 | + String hostName = String.format(Locale.ROOT, "host-%03d", batchIndex); |
| 729 | + long timestamp = baseTimestamp + (1000L * i); |
| 730 | + |
| 731 | + d.add(new SortedDocValuesField(hostnameField, new BytesRef(hostName))); |
| 732 | + // Index sorting doesn't work with NumericDocValuesField: |
| 733 | + d.add(new SortedNumericDocValuesField(timestampField, timestamp)); |
| 734 | + d.add(new NumericDocValuesField("counter", counter1++)); |
| 735 | + if (i % gap_frequency != 0) { |
| 736 | + d.add(new NumericDocValuesField("sparse_counter", counter1)); |
| 737 | + } |
| 738 | + |
| 739 | + int numGauge2 = 1 + random().nextInt(8); |
| 740 | + for (int j = 0; j < numGauge2; j++) { |
| 741 | + d.add(new SortedNumericDocValuesField("gauge", gauge2Values[(i + j) % gauge2Values.length])); |
| 742 | + if (i % gap_frequency != 0) { |
| 743 | + d.add(new SortedNumericDocValuesField("sparse_gauge", gauge2Values[(i + j) % gauge2Values.length])); |
| 744 | + } |
| 745 | + } |
| 746 | + |
| 747 | + d.add(new SortedDocValuesField("tag", new BytesRef(randomFrom(tags)))); |
| 748 | + if (i % gap_frequency != 0) { |
| 749 | + d.add(new SortedDocValuesField("sparse_tag", new BytesRef(randomFrom(tags)))); |
| 750 | + } |
| 751 | + |
| 752 | + int numTags = 1 + random().nextInt(8); |
| 753 | + for (int j = 0; j < numTags; j++) { |
| 754 | + d.add(new SortedSetDocValuesField("tags", new BytesRef(tags[(i + j) % tags.length]))); |
| 755 | + if (i % gap_frequency != 0) { |
| 756 | + d.add(new SortedSetDocValuesField("sparse_tags", new BytesRef(tags[(i + j) % tags.length]))); |
| 757 | + } |
| 758 | + } |
| 759 | + |
| 760 | + d.add(new BinaryDocValuesField("tags_as_bytes", new BytesRef(tags[i % tags.length]))); |
| 761 | + if (i % gap_frequency != 0) { |
| 762 | + d.add(new BinaryDocValuesField("sparse_tags_as_bytes", new BytesRef(tags[i % tags.length]))); |
| 763 | + } |
| 764 | + |
| 765 | + iw.addDocument(d); |
| 766 | + if (i % 100 == 0) { |
| 767 | + iw.commit(); |
| 768 | + } |
| 769 | + } |
| 770 | + iw.commit(); |
| 771 | + |
| 772 | + iw.forceMerge(1); |
| 773 | + |
| 774 | + try (var reader = DirectoryReader.open(iw)) { |
| 775 | + assertEquals(1, reader.leaves().size()); |
| 776 | + assertEquals(numDocs, reader.maxDoc()); |
| 777 | + var leaf = reader.leaves().get(0).reader(); |
| 778 | + var hostNameDV = leaf.getSortedDocValues(hostnameField); |
| 779 | + assertNotNull(hostNameDV); |
| 780 | + validateRunEnd(hostNameDV); |
| 781 | + var timestampDV = DocValues.unwrapSingleton(leaf.getSortedNumericDocValues(timestampField)); |
| 782 | + assertNotNull(timestampDV); |
| 783 | + validateRunEnd(timestampDV); |
| 784 | + var counterOneDV = leaf.getNumericDocValues("counter"); |
| 785 | + assertNotNull(counterOneDV); |
| 786 | + validateRunEnd(counterOneDV); |
| 787 | + var sparseCounter = leaf.getNumericDocValues("sparse_counter"); |
| 788 | + assertNotNull(sparseCounter); |
| 789 | + validateRunEnd(sparseCounter); |
| 790 | + var gaugeOneDV = leaf.getSortedNumericDocValues("gauge"); |
| 791 | + assertNotNull(gaugeOneDV); |
| 792 | + validateRunEnd(gaugeOneDV); |
| 793 | + var sparseGaugeDV = leaf.getSortedNumericDocValues("sparse_gauge"); |
| 794 | + assertNotNull(sparseGaugeDV); |
| 795 | + validateRunEnd(sparseGaugeDV); |
| 796 | + var tagDV = leaf.getSortedDocValues("tag"); |
| 797 | + assertNotNull(tagDV); |
| 798 | + validateRunEnd(tagDV); |
| 799 | + var sparseTagDV = leaf.getSortedDocValues("sparse_tag"); |
| 800 | + assertNotNull(sparseTagDV); |
| 801 | + validateRunEnd(sparseTagDV); |
| 802 | + var tagsDV = leaf.getSortedSetDocValues("tags"); |
| 803 | + assertNotNull(tagsDV); |
| 804 | + validateRunEnd(tagsDV); |
| 805 | + var sparseTagsDV = leaf.getSortedSetDocValues("sparse_tags"); |
| 806 | + assertNotNull(sparseTagsDV); |
| 807 | + validateRunEnd(sparseTagsDV); |
| 808 | + var tagBytesDV = leaf.getBinaryDocValues("tags_as_bytes"); |
| 809 | + assertNotNull(tagBytesDV); |
| 810 | + validateRunEnd(tagBytesDV); |
| 811 | + var sparseTagBytesDV = leaf.getBinaryDocValues("sparse_tags_as_bytes"); |
| 812 | + assertNotNull(sparseTagBytesDV); |
| 813 | + validateRunEnd(sparseTagBytesDV); |
| 814 | + } |
| 815 | + } |
| 816 | + } |
| 817 | + |
| 818 | + private void validateRunEnd(DocIdSetIterator iterator) throws IOException { |
| 819 | + int runCount = 0; |
| 820 | + while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { |
| 821 | + int runLength = iterator.docIDRunEnd() - iterator.docID() - 1; |
| 822 | + if (runLength > 1) { |
| 823 | + runCount++; |
| 824 | + for (int i = 0; i < runLength; i++) { |
| 825 | + int expected = iterator.docID() + 1; |
| 826 | + assertEquals(expected, iterator.advance(expected)); |
| 827 | + } |
| 828 | + } |
| 829 | + } |
| 830 | + assertTrue("Expected docid runs of greater than 1", runCount > 0); |
| 831 | + } |
| 832 | + |
701 | 833 | private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField) { |
702 | 834 | var config = new IndexWriterConfig(); |
703 | 835 | config.setIndexSort( |
|
0 commit comments