Skip to content

Commit 2314d5d

Browse files
authored
Merge branch 'main' into feature/esql-group-by-all-dimensions
2 parents 47856b3 + e6c5dcc commit 2314d5d

File tree

40 files changed

+2049
-247
lines changed

40 files changed

+2049
-247
lines changed

docs/changelog/138029.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 138029
2+
summary: Fuse MV_MIN and MV_MAX and document process
3+
area: ES|QL
4+
type: feature
5+
issues: []

docs/changelog/138524.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
pr: 138524
2+
summary: Remove feature flag to enable binary doc value compression
3+
area: Mapping
4+
type: feature
5+
issues: []
6+
highlight:
7+
title: Remove feature flag to enable binary doc value compression
8+
body: |-
9+
Add compression for binary doc values using Zstd and blocks with a variable number of values.
10+
11+
Block-wise LZ4 compression was previously added to Lucene in LUCENE-9211 and removed in LUCENE-9378 due to query performance issues. This approach stored a constant number of values per block (specifically 32 values). This made it easy to map a given value index (e.g., docId) to the block containing it by doing blockId = docId / 32.
12+
Unfortunately, if values are very large, we must still have exactly 32 values per block, and (de)compressing a block could cause very high memory usage. As a result, we had to keep the number of values small, meaning that in the average case, a block was much smaller than ideal.
13+
To overcome the issues of blocks with a constant number of values, this PR adds block-wise compression with a variable number of values per block. It stores a minimum of 1 document per block and stops adding values when the size of a block exceeds a threshold or the number of values exceeds a threshold.
14+
Like the previous version, it stores an array of addresses for the start of each block. Additionally, it stores a parallel array with the docId at the start of each block. When looking up a given docId, if it is not in the current block, we binary search the array of docId starts to find the blockId containing the value. We then look up the address of the block. After this, decompression works very similarly to the code from LUCENE-9211; the main difference being that Zstd(1) is used instead of LZ4.
15+
16+
notable: true

docs/changelog/138541.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 138541
2+
summary: Simple bulk loading of compressed binary doc values
3+
area: Codec
4+
type: enhancement
5+
issues: []

docs/changelog/138589.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 138589
2+
summary: Upgrading commons-lang3 version for repository-hdfs plugin
3+
area: Snapshot/Restore
4+
type: upgrade
5+
issues: []

gradle/verification-metadata.xml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2287,11 +2287,6 @@
22872287
<sha256 value="ef368c9fa003963da78399b8f5a41ddfbef6b206f505f52293005730d87e7295" origin="Generated by Gradle"/>
22882288
</artifact>
22892289
</component>
2290-
<component group="org.apache.commons" name="commons-lang3" version="3.11">
2291-
<artifact name="commons-lang3-3.11.jar">
2292-
<sha256 value="4ee380259c068d1dbe9e84ab52186f2acd65de067ec09beff731fca1697fdb16" origin="Generated by Gradle"/>
2293-
</artifact>
2294-
</component>
22952290
<component group="org.apache.commons" name="commons-lang3" version="3.14.0">
22962291
<artifact name="commons-lang3-3.14.0.jar">
22972292
<sha256 value="7b96bf3ee68949abb5bc465559ac270e0551596fa34523fddf890ec418dde13c" origin="Generated by Gradle"/>
@@ -2302,6 +2297,11 @@
23022297
<sha256 value="4eeeae8d20c078abb64b015ec158add383ac581571cddc45c68f0c9ae0230720" origin="Generated by Gradle"/>
23032298
</artifact>
23042299
</component>
2300+
<component group="org.apache.commons" name="commons-lang3" version="3.20.0">
2301+
<artifact name="commons-lang3-3.20.0.jar">
2302+
<sha256 value="69e5c9fa35da7a51a5fd2099dfe56a2d8d32cf233e2f6d770e796146440263f4" origin="Generated by Gradle"/>
2303+
</artifact>
2304+
</component>
23052305
<component group="org.apache.commons" name="commons-lang3" version="3.7">
23062306
<artifact name="commons-lang3-3.7.jar">
23072307
<sha256 value="6e8dc31e046508d9953c96534edf0c2e0bfe6f468966b5b842b3f87e43b6a847" origin="Generated by Gradle"/>

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 82 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import org.apache.lucene.document.Field;
1515
import org.apache.lucene.document.FieldType;
1616
import org.apache.lucene.document.StoredField;
17+
import org.apache.lucene.index.BinaryDocValues;
18+
import org.apache.lucene.index.DocValues;
1719
import org.apache.lucene.index.IndexOptions;
1820
import org.apache.lucene.index.LeafReaderContext;
1921
import org.apache.lucene.index.Term;
@@ -30,6 +32,7 @@
3032
import org.apache.lucene.util.BytesRef;
3133
import org.apache.lucene.util.IOFunction;
3234
import org.elasticsearch.common.CheckedIntFunction;
35+
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
3336
import org.elasticsearch.common.lucene.Lucene;
3437
import org.elasticsearch.common.text.UTF8DecodingReader;
3538
import org.elasticsearch.common.unit.Fuzziness;
@@ -39,6 +42,7 @@
3942
import org.elasticsearch.index.analysis.NamedAnalyzer;
4043
import org.elasticsearch.index.fielddata.FieldDataContext;
4144
import org.elasticsearch.index.fielddata.IndexFieldData;
45+
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
4246
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
4347
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
4448
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
@@ -297,12 +301,17 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
297301

298302
if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
299303
&& keywordParent.ignoreAbove().valuesPotentiallyIgnored()) {
300-
final String parentFallbackFieldName = keywordParent.syntheticSourceFallbackFieldName();
301304
if (parent.isStored()) {
302-
return storedFieldFetcher(parentFieldName, parentFallbackFieldName);
305+
return combineFieldFetchers(
306+
storedFieldFetcher(parentFieldName),
307+
ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
308+
);
303309
} else if (parent.hasDocValues()) {
304310
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
305-
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(parentFallbackFieldName));
311+
return combineFieldFetchers(
312+
docValuesFieldFetcher(ifd),
313+
ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
314+
);
306315
}
307316
}
308317

@@ -325,22 +334,16 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
325334
final KeywordFieldMapper.KeywordFieldType keywordDelegate
326335
) {
327336
if (keywordDelegate.ignoreAbove().valuesPotentiallyIgnored()) {
328-
// because we don't know whether the delegate field will be ignored during parsing, we must also check the current field
329-
String fieldName = name();
330-
String fallbackName = syntheticSourceFallbackFieldName();
331-
332-
// delegate field names
333337
String delegateFieldName = keywordDelegate.name();
334-
String delegateFieldFallbackName = keywordDelegate.syntheticSourceFallbackFieldName();
338+
// bc we don't know whether the delegate will ignore a value, we must also check the fallback field created by this
339+
// match_only_text field
340+
String fallbackName = syntheticSourceFallbackFieldName();
335341

336342
if (keywordDelegate.isStored()) {
337-
return storedFieldFetcher(delegateFieldName, delegateFieldFallbackName, fieldName, fallbackName);
343+
return storedFieldFetcher(delegateFieldName, fallbackName);
338344
} else if (keywordDelegate.hasDocValues()) {
339345
var ifd = searchExecutionContext.getForField(keywordDelegate, MappedFieldType.FielddataOperation.SEARCH);
340-
return combineFieldFetchers(
341-
docValuesFieldFetcher(ifd),
342-
storedFieldFetcher(delegateFieldFallbackName, fieldName, fallbackName)
343-
);
346+
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fallbackName));
344347
}
345348
}
346349

@@ -355,25 +358,34 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
355358
}
356359
}
357360

358-
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(
359-
IndexFieldData<?> ifd
361+
private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(IndexFieldData<?> ifd) {
362+
return context -> {
363+
SortedBinaryDocValues indexedValuesDocValues = ifd.load(context).getBytesValues();
364+
return docId -> getValuesFromDocValues(indexedValuesDocValues, docId);
365+
};
366+
}
367+
368+
private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> ignoredValuesDocValuesFieldFetcher(
369+
String fieldName
360370
) {
361371
return context -> {
362-
var sortedBinaryDocValues = ifd.load(context).getBytesValues();
363-
return docId -> {
364-
if (sortedBinaryDocValues.advanceExact(docId)) {
365-
var values = new ArrayList<>(sortedBinaryDocValues.docValueCount());
366-
for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) {
367-
values.add(sortedBinaryDocValues.nextValue().utf8ToString());
368-
}
369-
return values;
370-
} else {
371-
return List.of();
372-
}
373-
};
372+
CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues(DocValues.getBinary(context.reader(), fieldName));
373+
return docId -> getValuesFromDocValues(ignoredValuesDocValues, docId);
374374
};
375375
}
376376

377+
private List<Object> getValuesFromDocValues(SortedBinaryDocValues docValues, int docId) throws IOException {
378+
if (docValues.advanceExact(docId)) {
379+
var values = new ArrayList<>(docValues.docValueCount());
380+
for (int i = 0; i < docValues.docValueCount(); i++) {
381+
values.add(docValues.nextValue().utf8ToString());
382+
}
383+
return values;
384+
} else {
385+
return List.of();
386+
}
387+
}
388+
377389
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String... names) {
378390
var loader = StoredFieldLoader.create(false, Set.of(names));
379391
return context -> {
@@ -779,4 +791,46 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
779791

780792
return fieldLoader;
781793
}
794+
795+
/**
796+
* A wrapper around {@link BinaryDocValues} that exposes some quality of life functions. Note, these values are not sorted.
797+
*/
798+
private static class CustomBinaryDocValues extends SortedBinaryDocValues {
799+
800+
private final BinaryDocValues binaryDocValues;
801+
private final ByteArrayStreamInput stream;
802+
803+
private int docValueCount = 0;
804+
805+
CustomBinaryDocValues(BinaryDocValues binaryDocValues) {
806+
this.binaryDocValues = binaryDocValues;
807+
this.stream = new ByteArrayStreamInput();
808+
}
809+
810+
@Override
811+
public BytesRef nextValue() throws IOException {
812+
// this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
813+
return stream.readBytesRef();
814+
}
815+
816+
@Override
817+
public boolean advanceExact(int docId) throws IOException {
818+
// if document has a value, read underlying bytes
819+
if (binaryDocValues.advanceExact(docId)) {
820+
BytesRef docValuesBytes = binaryDocValues.binaryValue();
821+
stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
822+
docValueCount = stream.readVInt();
823+
return true;
824+
}
825+
826+
// otherwise there is nothing to do
827+
docValueCount = 0;
828+
return false;
829+
}
830+
831+
@Override
832+
public int docValueCount() {
833+
return docValueCount;
834+
}
835+
}
782836
}

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java

Lines changed: 19 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@
1515
import org.apache.lucene.search.Query;
1616
import org.elasticsearch.common.Explicit;
1717
import org.elasticsearch.common.settings.Setting;
18-
import org.elasticsearch.common.settings.Settings;
1918
import org.elasticsearch.common.xcontent.support.XContentMapValues;
2019
import org.elasticsearch.index.IndexMode;
2120
import org.elasticsearch.index.IndexSettings;
22-
import org.elasticsearch.index.IndexVersion;
2321
import org.elasticsearch.index.IndexVersions;
2422
import org.elasticsearch.index.fielddata.FieldData;
2523
import org.elasticsearch.index.fielddata.FieldDataContext;
@@ -133,46 +131,24 @@ public static class Builder extends FieldMapper.Builder {
133131
*/
134132
private final Parameter<TimeSeriesParams.MetricType> metric;
135133

136-
private final IndexMode indexMode;
137-
private final IndexVersion indexCreatedVersion;
138-
private final SourceKeepMode indexSourceKeepMode;
139-
140-
public Builder(
141-
String name,
142-
Settings settings,
143-
IndexMode indexMode,
144-
IndexVersion indexCreatedVersion,
145-
SourceKeepMode indexSourceKeepMode
146-
) {
147-
this(
148-
name,
149-
IGNORE_MALFORMED_SETTING.get(settings),
150-
COERCE_SETTING.get(settings),
151-
indexMode,
152-
indexCreatedVersion,
153-
indexSourceKeepMode
154-
);
155-
}
134+
private final IndexSettings indexSettings;
156135

157-
public Builder(
158-
String name,
159-
boolean ignoreMalformedByDefault,
160-
boolean coerceByDefault,
161-
IndexMode indexMode,
162-
IndexVersion indexCreatedVersion,
163-
SourceKeepMode indexSourceKeepMode
164-
) {
136+
public Builder(String name, IndexSettings indexSettings) {
165137
super(name);
166138
this.ignoreMalformed = Parameter.explicitBoolParam(
167139
"ignore_malformed",
168140
true,
169141
m -> toType(m).ignoreMalformed,
170-
ignoreMalformedByDefault
142+
IGNORE_MALFORMED_SETTING.get(indexSettings.getSettings())
143+
);
144+
this.coerce = Parameter.explicitBoolParam(
145+
"coerce",
146+
true,
147+
m -> toType(m).coerce,
148+
COERCE_SETTING.get(indexSettings.getSettings())
171149
);
172-
this.coerce = Parameter.explicitBoolParam("coerce", true, m -> toType(m).coerce, coerceByDefault);
173-
this.indexMode = indexMode;
174150
this.indexed = Parameter.indexParam(m -> toType(m).indexed, () -> {
175-
if (indexMode == IndexMode.TIME_SERIES) {
151+
if (indexSettings.getMode() == IndexMode.TIME_SERIES) {
176152
var metricType = getMetric().getValue();
177153
return metricType != TimeSeriesParams.MetricType.COUNTER && metricType != TimeSeriesParams.MetricType.GAUGE;
178154
} else {
@@ -190,8 +166,7 @@ public Builder(
190166
);
191167
}
192168
});
193-
this.indexCreatedVersion = indexCreatedVersion;
194-
this.indexSourceKeepMode = indexSourceKeepMode;
169+
this.indexSettings = indexSettings;
195170
}
196171

197172
Builder scalingFactor(double scalingFactor) {
@@ -229,17 +204,17 @@ public ScaledFloatFieldMapper build(MapperBuilderContext context) {
229204
scalingFactor.getValue(),
230205
nullValue.getValue(),
231206
metric.getValue(),
232-
indexMode,
207+
indexSettings.getMode(),
233208
coerce.getValue().value(),
234209
context.isSourceSynthetic()
235210
);
236211
String offsetsFieldName = getOffsetsFieldName(
237212
context,
238-
indexSourceKeepMode,
213+
indexSettings.sourceKeepMode(),
239214
hasDocValues.getValue(),
240215
stored.getValue(),
241216
this,
242-
indexCreatedVersion,
217+
indexSettings.getIndexVersionCreated(),
243218
IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_SCALED_FLOAT
244219
);
245220
return new ScaledFloatFieldMapper(
@@ -253,15 +228,7 @@ public ScaledFloatFieldMapper build(MapperBuilderContext context) {
253228
}
254229
}
255230

256-
public static final TypeParser PARSER = new TypeParser(
257-
(n, c) -> new Builder(
258-
n,
259-
c.getSettings(),
260-
c.getIndexSettings().getMode(),
261-
c.indexVersionCreated(),
262-
c.getIndexSettings().sourceKeepMode()
263-
)
264-
);
231+
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexSettings()));
265232

266233
public static final class ScaledFloatFieldType extends SimpleMappedFieldType {
267234

@@ -595,15 +562,10 @@ public String toString() {
595562
private final Double nullValue;
596563
private final double scalingFactor;
597564
private final boolean isSourceSynthetic;
598-
599-
private final boolean ignoreMalformedByDefault;
600-
private final boolean coerceByDefault;
601565
private final TimeSeriesParams.MetricType metricType;
602-
private final IndexMode indexMode;
603-
604-
private final IndexVersion indexCreatedVersion;
605566
private final String offsetsFieldName;
606-
private final SourceKeepMode indexSourceKeepMode;
567+
568+
private final IndexSettings indexSettings;
607569

608570
private ScaledFloatFieldMapper(
609571
String simpleName,
@@ -622,13 +584,9 @@ private ScaledFloatFieldMapper(
622584
this.nullValue = builder.nullValue.getValue();
623585
this.ignoreMalformed = builder.ignoreMalformed.getValue();
624586
this.coerce = builder.coerce.getValue();
625-
this.ignoreMalformedByDefault = builder.ignoreMalformed.getDefaultValue().value();
626-
this.coerceByDefault = builder.coerce.getDefaultValue().value();
587+
this.indexSettings = builder.indexSettings;
627588
this.metricType = builder.metric.getValue();
628-
this.indexMode = builder.indexMode;
629-
this.indexCreatedVersion = builder.indexCreatedVersion;
630589
this.offsetsFieldName = offsetsFieldName;
631-
this.indexSourceKeepMode = builder.indexSourceKeepMode;
632590
}
633591

634592
boolean coerce() {
@@ -657,9 +615,7 @@ protected String contentType() {
657615

658616
@Override
659617
public FieldMapper.Builder getMergeBuilder() {
660-
return new Builder(leafName(), ignoreMalformedByDefault, coerceByDefault, indexMode, indexCreatedVersion, indexSourceKeepMode)
661-
.metric(metricType)
662-
.init(this);
618+
return new Builder(leafName(), indexSettings).metric(metricType).init(this);
663619
}
664620

665621
@Override

0 commit comments

Comments
 (0)