2727import org .apache .lucene .index .LogByteSizeMergePolicy ;
2828import org .apache .lucene .index .NumericDocValues ;
2929import org .apache .lucene .index .SortedDocValues ;
30+ import org .apache .lucene .search .DocIdSetIterator ;
3031import org .apache .lucene .search .IndexSearcher ;
3132import org .apache .lucene .search .Sort ;
3233import org .apache .lucene .search .SortField ;
4950import java .util .function .Supplier ;
5051import java .util .stream .IntStream ;
5152
53+ import static org .elasticsearch .test .ESTestCase .randomFrom ;
54+
5255public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests {
5356
5457 final Codec codec = TestUtil .alwaysDocValuesFormat (new ES819TSDBDocValuesFormat ());
@@ -67,9 +70,9 @@ public void testForceMergeDenseCase() throws Exception {
6770 try (var dir = newDirectory (); var iw = new IndexWriter (dir , config )) {
6871 long counter1 = 0 ;
6972 long counter2 = 10_000_000 ;
70- long [] gauge1Values = new long [] { 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 };
71- long [] gauge2Values = new long [] { -2 , -4 , -6 , -8 , -10 , -12 , -14 , -16 };
72- String [] tags = new String [] { "tag_1" , "tag_2" , "tag_3" , "tag_4" , "tag_5" , "tag_6" , "tag_7" , "tag_8" };
73+ long [] gauge1Values = new long []{ 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 };
74+ long [] gauge2Values = new long []{ -2 , -4 , -6 , -8 , -10 , -12 , -14 , -16 };
75+ String [] tags = new String []{ "tag_1" , "tag_2" , "tag_3" , "tag_4" , "tag_5" , "tag_6" , "tag_7" , "tag_8" };
7376
7477 int numDocs = 256 + random ().nextInt (1024 );
7578 int numHosts = numDocs / 20 ;
@@ -290,9 +293,9 @@ public void testForceMergeSparseCase() throws Exception {
290293 try (var dir = newDirectory (); var iw = new IndexWriter (dir , config )) {
291294 long counter1 = 0 ;
292295 long counter2 = 10_000_000 ;
293- long [] gauge1Values = new long [] { 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 };
294- long [] gauge2Values = new long [] { -2 , -4 , -6 , -8 , -10 , -12 , -14 , -16 };
295- String [] tags = new String [] { "tag_1" , "tag_2" , "tag_3" , "tag_4" , "tag_5" , "tag_6" , "tag_7" , "tag_8" };
296+ long [] gauge1Values = new long []{ 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 };
297+ long [] gauge2Values = new long []{ -2 , -4 , -6 , -8 , -10 , -12 , -14 , -16 };
298+ String [] tags = new String []{ "tag_1" , "tag_2" , "tag_3" , "tag_4" , "tag_5" , "tag_6" , "tag_7" , "tag_8" };
296299
297300 int numDocs = 256 + random ().nextInt (1024 );
298301 int numHosts = numDocs / 20 ;
@@ -442,8 +445,8 @@ public void testWithNoValueMultiValue() throws Exception {
442445
443446 var config = getTimeSeriesIndexWriterConfig (hostnameField , timestampField );
444447 try (var dir = newDirectory (); var iw = new IndexWriter (dir , config )) {
445- long [] gauge1Values = new long [] { 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 };
446- String [] tags = new String [] { "tag_1" , "tag_2" , "tag_3" , "tag_4" , "tag_5" , "tag_6" , "tag_7" , "tag_8" };
448+ long [] gauge1Values = new long []{ 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 };
449+ String [] tags = new String []{ "tag_1" , "tag_2" , "tag_3" , "tag_4" , "tag_5" , "tag_6" , "tag_7" , "tag_8" };
447450 {
448451 long timestamp = baseTimestamp ;
449452 for (int i = 0 ; i < numRounds ; i ++) {
@@ -709,7 +712,7 @@ public void testBulkLoading() throws Exception {
709712
710713 var config = getTimeSeriesIndexWriterConfig (null , timestampField );
711714 try (var dir = newDirectory (); var iw = new IndexWriter (dir , config )) {
712- long [] gauge1Values = new long [] { 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 };
715+ long [] gauge1Values = new long []{ 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 };
713716 int numDocs = 256 + random ().nextInt (8096 );
714717
715718 for (int i = 0 ; i < numDocs ; i ++) {
@@ -740,7 +743,7 @@ public void testBulkLoading() throws Exception {
740743 var counterDV = getBulkNumericDocValues (leaf .reader (), counterField );
741744 var gaugeDV = getBulkNumericDocValues (leaf .reader (), gaugeField );
742745 int maxDoc = leaf .reader ().maxDoc ();
743- for (int i = 0 ; i < maxDoc ;) {
746+ for (int i = 0 ; i < maxDoc ; ) {
744747 int size = Math .max (1 , random ().nextInt (0 , maxDoc - i ));
745748 var docs = TestBlock .docs (IntStream .range (i , i + size ).toArray ());
746749
@@ -959,6 +962,135 @@ private static BulkNumericDocValues getBulkNumericDocValues(LeafReader leafReade
959962 return (BulkNumericDocValues ) DocValues .unwrapSingleton (leafReader .getSortedNumericDocValues (counterField ));
960963 }
961964
965+ public void testDocIDEndRun () throws IOException {
966+ String timestampField = "@timestamp" ;
967+ String hostnameField = "host.name" ;
968+ long baseTimestamp = 1704067200000L ;
969+
970+ var config = getTimeSeriesIndexWriterConfig (hostnameField , timestampField );
971+ try (var dir = newDirectory (); var iw = new IndexWriter (dir , config )) {
972+ long counter1 = 0 ;
973+
974+
975+ long [] gauge2Values = new long []{-2 , -4 , -6 , -8 , -10 , -12 , -14 , -16 };
976+ String [] tags = new String []{"tag_1" , "tag_2" , "tag_3" , "tag_4" , "tag_5" , "tag_6" , "tag_7" , "tag_8" };
977+
978+ // IndexedDISI stores ids in blocks of 4096. To test sparse end runs, we want a mixture of
979+ // dense and sparse blocks, so we need the gap frequency to be larger than
980+ // this value, but smaller than two blocks, and to index at least three blocks
981+ int gap_frequency = 4500 + random ().nextInt (2048 );
982+ int numDocs = 10000 + random ().nextInt (10000 );
983+ int numHosts = numDocs / 20 ;
984+
985+ for (int i = 0 ; i < numDocs ; i ++) {
986+ var d = new Document ();
987+
988+ int batchIndex = i / numHosts ;
989+ String hostName = String .format (Locale .ROOT , "host-%03d" , batchIndex );
990+ long timestamp = baseTimestamp + (1000L * i );
991+
992+ d .add (new SortedDocValuesField (hostnameField , new BytesRef (hostName )));
993+ // Index sorting doesn't work with NumericDocValuesField:
994+ d .add (new SortedNumericDocValuesField (timestampField , timestamp ));
995+ d .add (new NumericDocValuesField ("counter" , counter1 ++));
996+ if (i % gap_frequency != 0 ) {
997+ d .add (new NumericDocValuesField ("sparse_counter" , counter1 ));
998+ }
999+
1000+ int numGauge2 = 1 + random ().nextInt (8 );
1001+ for (int j = 0 ; j < numGauge2 ; j ++) {
1002+ d .add (new SortedNumericDocValuesField ("gauge" , gauge2Values [(i + j ) % gauge2Values .length ]));
1003+ if (i % gap_frequency != 0 ) {
1004+ d .add (new SortedNumericDocValuesField ("sparse_gauge" , gauge2Values [(i + j ) % gauge2Values .length ]));
1005+ }
1006+ }
1007+
1008+ d .add (new SortedDocValuesField ("tag" , new BytesRef (randomFrom (tags ))));
1009+ if (i % gap_frequency != 0 ) {
1010+ d .add (new SortedDocValuesField ("sparse_tag" , new BytesRef (randomFrom (tags ))));
1011+ }
1012+
1013+ int numTags = 1 + random ().nextInt (8 );
1014+ for (int j = 0 ; j < numTags ; j ++) {
1015+ d .add (new SortedSetDocValuesField ("tags" , new BytesRef (tags [(i + j ) % tags .length ])));
1016+ if (i % gap_frequency != 0 ) {
1017+ d .add (new SortedSetDocValuesField ("sparse_tags" , new BytesRef (tags [(i + j ) % tags .length ])));
1018+ }
1019+ }
1020+
1021+ d .add (new BinaryDocValuesField ("tags_as_bytes" , new BytesRef (tags [i % tags .length ])));
1022+ if (i % gap_frequency != 0 ) {
1023+ d .add (new BinaryDocValuesField ("sparse_tags_as_bytes" , new BytesRef (tags [i % tags .length ])));
1024+ }
1025+
1026+ iw .addDocument (d );
1027+ if (i % 100 == 0 ) {
1028+ iw .commit ();
1029+ }
1030+ }
1031+ iw .commit ();
1032+
1033+ iw .forceMerge (1 );
1034+
1035+ try (var reader = DirectoryReader .open (iw )) {
1036+ assertEquals (1 , reader .leaves ().size ());
1037+ assertEquals (numDocs , reader .maxDoc ());
1038+ var leaf = reader .leaves ().get (0 ).reader ();
1039+ var hostNameDV = leaf .getSortedDocValues (hostnameField );
1040+ assertNotNull (hostNameDV );
1041+ validateRunEnd (hostNameDV );
1042+ var timestampDV = DocValues .unwrapSingleton (leaf .getSortedNumericDocValues (timestampField ));
1043+ assertNotNull (timestampDV );
1044+ validateRunEnd (timestampDV );
1045+ var counterOneDV = leaf .getNumericDocValues ("counter" );
1046+ assertNotNull (counterOneDV );
1047+ validateRunEnd (counterOneDV );
1048+ var sparseCounter = leaf .getNumericDocValues ("sparse_counter" );
1049+ assertNotNull (sparseCounter );
1050+ validateRunEnd (sparseCounter );
1051+ var gaugeOneDV = leaf .getSortedNumericDocValues ("gauge" );
1052+ assertNotNull (gaugeOneDV );
1053+ validateRunEnd (gaugeOneDV );
1054+ var sparseGaugeDV = leaf .getSortedNumericDocValues ("sparse_gauge" );
1055+ assertNotNull (sparseGaugeDV );
1056+ validateRunEnd (sparseGaugeDV );
1057+ var tagDV = leaf .getSortedDocValues ("tag" );
1058+ assertNotNull (tagDV );
1059+ validateRunEnd (tagDV );
1060+ var sparseTagDV = leaf .getSortedDocValues ("sparse_tag" );
1061+ assertNotNull (sparseTagDV );
1062+ validateRunEnd (sparseTagDV );
1063+ var tagsDV = leaf .getSortedSetDocValues ("tags" );
1064+ assertNotNull (tagsDV );
1065+ validateRunEnd (tagsDV );
1066+ var sparseTagsDV = leaf .getSortedSetDocValues ("sparse_tags" );
1067+ assertNotNull (sparseTagsDV );
1068+ validateRunEnd (sparseTagsDV );
1069+ var tagBytesDV = leaf .getBinaryDocValues ("tags_as_bytes" );
1070+ assertNotNull (tagBytesDV );
1071+ validateRunEnd (tagBytesDV );
1072+ var sparseTagBytesDV = leaf .getBinaryDocValues ("sparse_tags_as_bytes" );
1073+ assertNotNull (sparseTagBytesDV );
1074+ validateRunEnd (sparseTagBytesDV );
1075+ }
1076+ }
1077+ }
1078+
1079+ private void validateRunEnd (DocIdSetIterator iterator ) throws IOException {
1080+ int runCount = 0 ;
1081+ while (iterator .nextDoc () != DocIdSetIterator .NO_MORE_DOCS ) {
1082+ int runLength = iterator .docIDRunEnd () - iterator .docID () - 1 ;
1083+ if (runLength > 1 ) {
1084+ runCount ++;
1085+ for (int i = 0 ; i < runLength ; i ++) {
1086+ int expected = iterator .docID () + 1 ;
1087+ assertEquals (expected , iterator .advance (expected ));
1088+ }
1089+ }
1090+ }
1091+ assertTrue ("Expected docid runs of greater than 1" , runCount > 0 );
1092+ }
1093+
9621094 private IndexWriterConfig getTimeSeriesIndexWriterConfig (String hostnameField , String timestampField ) {
9631095 var config = new IndexWriterConfig ();
9641096 if (hostnameField != null ) {
0 commit comments