Skip to content

Commit 164fb7d

Browse files
committed
Fix doc writers block writing logic for diskbbq
1 parent 8097c1a commit 164fb7d

File tree

3 files changed

+29
-25
lines changed

3 files changed

+29
-25
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -531,9 +531,6 @@ tests:
531531
- class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT
532532
method: test {csv-spec:spatial.ConvertFromStringParseError}
533533
issue: https://github.com/elastic/elasticsearch/issues/134104
534-
- class: org.elasticsearch.index.codec.vectors.DocIdsWriterTests
535-
method: testSorted
536-
issue: https://github.com/elastic/elasticsearch/issues/134106
537534
- class: org.elasticsearch.xpack.writeloadforecaster.WriteLoadForecasterIT
538535
method: testWriteLoadForecastIsOverriddenBySetting
539536
issue: https://github.com/elastic/elasticsearch/issues/133455

server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -74,20 +74,39 @@ byte calculateBlockEncoding(IntToIntFunction docIds, int count, int blockSize) {
7474
if (count == 0) {
7575
return CONTINUOUS_IDS;
7676
}
77-
byte encoding = CONTINUOUS_IDS;
7877
int iterationLimit = count - blockSize + 1;
7978
int i = 0;
79+
int maxValue = 0;
80+
int maxMin2Max = 0;
81+
boolean continuousIds = true;
8082
for (; i < iterationLimit; i += blockSize) {
8183
int offset = i;
82-
encoding = (byte) Math.max(encoding, blockEncoding(d -> docIds.apply(offset + d), blockSize));
84+
var r = sortedAndMaxAndMin2Max(d -> docIds.apply(offset + d), blockSize);
85+
continuousIds &= r[0] == 1;
86+
maxValue = Math.max(maxValue, r[1]);
87+
maxMin2Max = Math.max(maxMin2Max, r[2]);
8388
}
8489
// check the tail
85-
if (i == count) {
86-
return encoding;
90+
if (i < count) {
91+
int offset = i;
92+
var r = sortedAndMaxAndMin2Max(d -> docIds.apply(offset + d), count - i);
93+
continuousIds &= r[0] == 1;
94+
maxValue = Math.max(maxValue, r[1]);
95+
maxMin2Max = Math.max(maxMin2Max, r[2]);
96+
}
97+
if (continuousIds) {
98+
return CONTINUOUS_IDS;
99+
} else if (maxMin2Max <= 0xFFFF) {
100+
return DELTA_BPV_16;
101+
} else {
102+
if (maxValue <= 0x1FFFFF) {
103+
return BPV_21;
104+
} else if (maxValue <= 0xFFFFFF) {
105+
return BPV_24;
106+
} else {
107+
return BPV_32;
108+
}
87109
}
88-
int offset = i;
89-
encoding = (byte) Math.max(encoding, blockEncoding(d -> docIds.apply(offset + d), count - i));
90-
return encoding;
91110
}
92111

93112
void writeDocIds(IntToIntFunction docIds, int count, byte encoding, DataOutput out) throws IOException {
@@ -197,7 +216,7 @@ private void write32(IntToIntFunction docIds, int count, int min, DataOutput out
197216
}
198217
}
199218

200-
private static byte blockEncoding(IntToIntFunction docIds, int count) {
219+
private static int[] sortedAndMaxAndMin2Max(IntToIntFunction docIds, int count) {
201220
// docs can be sorted either when all docs in a block have the same value
202221
// or when a segment is sorted
203222
boolean strictlySorted = true;
@@ -214,20 +233,7 @@ private static byte blockEncoding(IntToIntFunction docIds, int count) {
214233
}
215234

216235
int min2max = max - min + 1;
217-
if (strictlySorted && min2max == count) {
218-
return CONTINUOUS_IDS;
219-
}
220-
if (min2max <= 0xFFFF) {
221-
return DELTA_BPV_16;
222-
} else {
223-
if (max <= 0x1FFFFF) {
224-
return BPV_21;
225-
} else if (max <= 0xFFFFFF) {
226-
return BPV_24;
227-
} else {
228-
return BPV_32;
229-
}
230-
}
236+
return new int[] { (strictlySorted && min2max == count) ? 1 : 0, max, min2max };
231237
}
232238

233239
void writeDocIds(IntToIntFunction docIds, int count, DataOutput out) throws IOException {

server/src/test/java/org/elasticsearch/index/codec/vectors/DocIdsWriterTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
import static org.elasticsearch.index.codec.vectors.DocIdsWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
4040

41+
@com.carrotsearch.randomizedtesting.annotations.Repeat(iterations = 100)
4142
public class DocIdsWriterTests extends LuceneTestCase {
4243

4344
public void testNoDocs() throws Exception {

0 commit comments

Comments
 (0)