Skip to content

Commit 2794b8d

Browse files
authored
Fix doc writers block writing logic for diskbbq (#134145)
Instead of relying of "max bpv" logic, which can break in the event of delta encoding, this tracks the relevant statistics utilized to determine the optimal bpv, and picks the biggest ones from there. closes: #134106
1 parent 3fea686 commit 2794b8d

File tree

2 files changed

+28
-25
lines changed

2 files changed

+28
-25
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -525,9 +525,6 @@ tests:
525525
- class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT
526526
method: test {csv-spec:spatial.ConvertFromStringParseError}
527527
issue: https://github.com/elastic/elasticsearch/issues/134104
528-
- class: org.elasticsearch.index.codec.vectors.DocIdsWriterTests
529-
method: testSorted
530-
issue: https://github.com/elastic/elasticsearch/issues/134106
531528
- class: org.elasticsearch.xpack.writeloadforecaster.WriteLoadForecasterIT
532529
method: testWriteLoadForecastIsOverriddenBySetting
533530
issue: https://github.com/elastic/elasticsearch/issues/133455

server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -74,20 +74,39 @@ byte calculateBlockEncoding(IntToIntFunction docIds, int count, int blockSize) {
7474
if (count == 0) {
7575
return CONTINUOUS_IDS;
7676
}
77-
byte encoding = CONTINUOUS_IDS;
7877
int iterationLimit = count - blockSize + 1;
7978
int i = 0;
79+
int maxValue = 0;
80+
int maxMin2Max = 0;
81+
boolean continuousIds = true;
8082
for (; i < iterationLimit; i += blockSize) {
8183
int offset = i;
82-
encoding = (byte) Math.max(encoding, blockEncoding(d -> docIds.apply(offset + d), blockSize));
84+
var r = sortedAndMaxAndMin2Max(d -> docIds.apply(offset + d), blockSize);
85+
continuousIds &= r[0] == 1;
86+
maxValue = Math.max(maxValue, r[1]);
87+
maxMin2Max = Math.max(maxMin2Max, r[2]);
8388
}
8489
// check the tail
85-
if (i == count) {
86-
return encoding;
90+
if (i < count) {
91+
int offset = i;
92+
var r = sortedAndMaxAndMin2Max(d -> docIds.apply(offset + d), count - i);
93+
continuousIds &= r[0] == 1;
94+
maxValue = Math.max(maxValue, r[1]);
95+
maxMin2Max = Math.max(maxMin2Max, r[2]);
96+
}
97+
if (continuousIds) {
98+
return CONTINUOUS_IDS;
99+
} else if (maxMin2Max <= 0xFFFF) {
100+
return DELTA_BPV_16;
101+
} else {
102+
if (maxValue <= 0x1FFFFF) {
103+
return BPV_21;
104+
} else if (maxValue <= 0xFFFFFF) {
105+
return BPV_24;
106+
} else {
107+
return BPV_32;
108+
}
87109
}
88-
int offset = i;
89-
encoding = (byte) Math.max(encoding, blockEncoding(d -> docIds.apply(offset + d), count - i));
90-
return encoding;
91110
}
92111

93112
void writeDocIds(IntToIntFunction docIds, int count, byte encoding, DataOutput out) throws IOException {
@@ -197,7 +216,7 @@ private void write32(IntToIntFunction docIds, int count, int min, DataOutput out
197216
}
198217
}
199218

200-
private static byte blockEncoding(IntToIntFunction docIds, int count) {
219+
private static int[] sortedAndMaxAndMin2Max(IntToIntFunction docIds, int count) {
201220
// docs can be sorted either when all docs in a block have the same value
202221
// or when a segment is sorted
203222
boolean strictlySorted = true;
@@ -214,20 +233,7 @@ private static byte blockEncoding(IntToIntFunction docIds, int count) {
214233
}
215234

216235
int min2max = max - min + 1;
217-
if (strictlySorted && min2max == count) {
218-
return CONTINUOUS_IDS;
219-
}
220-
if (min2max <= 0xFFFF) {
221-
return DELTA_BPV_16;
222-
} else {
223-
if (max <= 0x1FFFFF) {
224-
return BPV_21;
225-
} else if (max <= 0xFFFFFF) {
226-
return BPV_24;
227-
} else {
228-
return BPV_32;
229-
}
230-
}
236+
return new int[] { (strictlySorted && min2max == count) ? 1 : 0, max, min2max };
231237
}
232238

233239
void writeDocIds(IntToIntFunction docIds, int count, DataOutput out) throws IOException {

0 commit comments

Comments
 (0)