Skip to content

Commit d2f712b

Browse files
authored
Merge branch 'main' into cohere_default_similarity
2 parents 207e0ae + 9dfe7ab commit d2f712b

File tree

45 files changed

+1220
-692
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1220
-692
lines changed

docs/changelog/119503.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 119503
2+
summary: Support indices created in ESv6 and updated in ESV7 using different LuceneCodecs as archive in current version.
3+
area: Search
4+
type: bug
5+
issues:
6+
- 117042

docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/match.md

Lines changed: 10 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/kibana/definition/functions/match.json

Lines changed: 29 additions & 29 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/privilege/IndexPrivilege.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.elasticsearch.action.support.IndexComponentSelector;
3535
import org.elasticsearch.cluster.metadata.DataStream;
3636
import org.elasticsearch.common.Strings;
37+
import org.elasticsearch.common.util.set.Sets;
3738
import org.elasticsearch.core.Nullable;
3839
import org.elasticsearch.index.seqno.RetentionLeaseActions;
3940
import org.elasticsearch.xpack.core.ccr.action.ForgetFollowerAction;
@@ -452,8 +453,8 @@ private static IndexPrivilege union(
452453
Collection<String> actions,
453454
IndexComponentSelectorPredicate selectorPredicate
454455
) {
455-
final Set<Automaton> automata = HashSet.newHashSet(privileges.size() + actions.size());
456-
final Set<String> names = HashSet.newHashSet(privileges.size() + actions.size());
456+
final Set<Automaton> automata = Sets.newHashSetWithExpectedSize(privileges.size() + actions.size());
457+
final Set<String> names = Sets.newHashSetWithExpectedSize(privileges.size() + actions.size());
457458
for (IndexPrivilege privilege : privileges) {
458459
names.addAll(privilege.name());
459460
automata.add(privilege.automaton);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -180,13 +180,14 @@ public Match(
180180
name = "analyzer",
181181
type = "keyword",
182182
valueHint = { "standard" },
183-
description = "Analyzer used to convert the text in the query value into token."
183+
description = "Analyzer used to convert the text in the query value into token. Defaults to the index-time analyzer"
184+
+ " mapped for the field. If no analyzer is mapped, the index’s default analyzer is used."
184185
),
185186
@MapParam.MapParamEntry(
186187
name = "auto_generate_synonyms_phrase_query",
187188
type = "boolean",
188189
valueHint = { "true", "false" },
189-
description = "If true, match phrase queries are automatically created for multi-term synonyms."
190+
description = "If true, match phrase queries are automatically created for multi-term synonyms. Defaults to true."
190191
),
191192
@MapParam.MapParamEntry(
192193
name = "fuzziness",
@@ -198,13 +199,14 @@ public Match(
198199
name = "boost",
199200
type = "float",
200201
valueHint = { "2.5" },
201-
description = "Floating point number used to decrease or increase the relevance scores of the query."
202+
description = "Floating point number used to decrease or increase the relevance scores of the query. Defaults to 1.0."
202203
),
203204
@MapParam.MapParamEntry(
204205
name = "fuzzy_transpositions",
205206
type = "boolean",
206207
valueHint = { "true", "false" },
207-
description = "If true, edits for fuzzy matching include transpositions of two adjacent characters (ab → ba)."
208+
description = "If true, edits for fuzzy matching include transpositions of two adjacent characters (ab → ba). "
209+
+ "Defaults to true."
208210
),
209211
@MapParam.MapParamEntry(
210212
name = "fuzzy_rewrite",
@@ -216,19 +218,22 @@ public Match(
216218
"top_terms_blended_freqs_N",
217219
"top_terms_boost_N",
218220
"top_terms_N" },
219-
description = "Method used to rewrite the query. See the rewrite parameter for valid values and more information."
221+
description = "Method used to rewrite the query. See the rewrite parameter for valid values and more information. "
222+
+ "If the fuzziness parameter is not 0, the match query uses a fuzzy_rewrite method of "
223+
+ "top_terms_blended_freqs_${max_expansions} by default."
220224
),
221225
@MapParam.MapParamEntry(
222226
name = "lenient",
223227
type = "boolean",
224228
valueHint = { "true", "false" },
225-
description = "If false, format-based errors, such as providing a text query value for a numeric field, are returned."
229+
description = "If false, format-based errors, such as providing a text query value for a numeric field, are returned. "
230+
+ "Defaults to false."
226231
),
227232
@MapParam.MapParamEntry(
228233
name = "max_expansions",
229234
type = "integer",
230235
valueHint = { "50" },
231-
description = "Maximum number of terms to which the query will expand."
236+
description = "Maximum number of terms to which the query will expand. Defaults to 50."
232237
),
233238
@MapParam.MapParamEntry(
234239
name = "minimum_should_match",
@@ -240,19 +245,20 @@ public Match(
240245
name = "operator",
241246
type = "keyword",
242247
valueHint = { "AND", "OR" },
243-
description = "Boolean logic used to interpret text in the query value."
248+
description = "Boolean logic used to interpret text in the query value. Defaults to OR."
244249
),
245250
@MapParam.MapParamEntry(
246251
name = "prefix_length",
247252
type = "integer",
248253
valueHint = { "1" },
249-
description = "Number of beginning characters left unchanged for fuzzy matching."
254+
description = "Number of beginning characters left unchanged for fuzzy matching. Defaults to 0."
250255
),
251256
@MapParam.MapParamEntry(
252257
name = "zero_terms_query",
253258
type = "keyword",
254259
valueHint = { "none", "all" },
255-
description = "Number of beginning characters left unchanged for fuzzy matching."
260+
description = "Indicates whether all documents or none are returned if the analyzer removes all tokens, such as "
261+
+ "when using a stop filter. Defaults to none."
256262
) },
257263
description = "(Optional) Match additional options as <<esql-function-named-params,function named parameters>>."
258264
+ " See <<query-dsl-match-query,match query>> for more information.",

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java

Lines changed: 131 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.apache.lucene.codecs.PostingsFormat;
1818
import org.apache.lucene.codecs.SegmentInfoFormat;
1919
import org.apache.lucene.codecs.TermVectorsFormat;
20+
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
2021
import org.apache.lucene.index.FieldInfo;
2122
import org.apache.lucene.index.FieldInfos;
2223
import org.apache.lucene.index.Fields;
@@ -26,6 +27,13 @@
2627
import org.apache.lucene.index.Terms;
2728
import org.apache.lucene.store.Directory;
2829
import org.apache.lucene.store.IOContext;
30+
import org.apache.lucene.util.Version;
31+
import org.elasticsearch.core.UpdateForV10;
32+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec;
33+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene80.BWCLucene80Codec;
34+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene84.BWCLucene84Codec;
35+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.BWCLucene86Codec;
36+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene87.BWCLucene87Codec;
2937

3038
import java.io.IOException;
3139
import java.util.ArrayList;
@@ -37,55 +45,122 @@
3745
*/
3846
public abstract class BWCCodec extends Codec {
3947

48+
private final FieldInfosFormat fieldInfosFormat;
49+
private final SegmentInfoFormat segmentInfosFormat;
50+
private final PostingsFormat postingsFormat;
51+
4052
protected BWCCodec(String name) {
4153
super(name);
42-
}
4354

44-
@Override
45-
public NormsFormat normsFormat() {
46-
throw new UnsupportedOperationException();
47-
}
55+
this.fieldInfosFormat = new FieldInfosFormat() {
56+
final FieldInfosFormat wrappedFormat = originalFieldInfosFormat();
4857

49-
@Override
50-
public TermVectorsFormat termVectorsFormat() {
51-
throw new UnsupportedOperationException();
52-
}
58+
@Override
59+
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
60+
throws IOException {
61+
return filterFields(wrappedFormat.read(directory, segmentInfo, segmentSuffix, iocontext));
62+
}
5363

54-
@Override
55-
public KnnVectorsFormat knnVectorsFormat() {
56-
throw new UnsupportedOperationException();
57-
}
64+
@Override
65+
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
66+
throws IOException {
67+
wrappedFormat.write(directory, segmentInfo, segmentSuffix, infos, context);
68+
}
69+
};
70+
71+
this.segmentInfosFormat = new SegmentInfoFormat() {
72+
final SegmentInfoFormat wrappedFormat = originalSegmentInfoFormat();
5873

59-
protected static SegmentInfoFormat wrap(SegmentInfoFormat wrapped) {
60-
return new SegmentInfoFormat() {
6174
@Override
6275
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
63-
return wrap(wrapped.read(directory, segmentName, segmentID, context));
76+
return wrap(wrappedFormat.read(directory, segmentName, segmentID, context));
6477
}
6578

6679
@Override
6780
public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
68-
wrapped.write(dir, info, ioContext);
81+
wrappedFormat.write(dir, info, ioContext);
6982
}
7083
};
71-
}
7284

73-
protected static FieldInfosFormat wrap(FieldInfosFormat wrapped) {
74-
return new FieldInfosFormat() {
85+
this.postingsFormat = new PerFieldPostingsFormat() {
7586
@Override
76-
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
77-
throws IOException {
78-
return filterFields(wrapped.read(directory, segmentInfo, segmentSuffix, iocontext));
79-
}
80-
81-
@Override
82-
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
83-
throws IOException {
84-
wrapped.write(directory, segmentInfo, segmentSuffix, infos, context);
87+
public PostingsFormat getPostingsFormatForField(String field) {
88+
throw new UnsupportedOperationException("Old codecs can't be used for writing");
8589
}
8690
};
8791
}
8892

93+
@Override
94+
public final FieldInfosFormat fieldInfosFormat() {
95+
return fieldInfosFormat;
96+
}
97+
98+
@Override
99+
public final SegmentInfoFormat segmentInfoFormat() {
100+
return segmentInfosFormat;
101+
}
102+
103+
@Override
104+
public PostingsFormat postingsFormat() {
105+
return postingsFormat;
106+
}
107+
108+
/**
109+
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
110+
* This method is never called in practice, as we rewrite field infos to override the info about which features are present in
111+
* the index. Even if norms are present, field info lies about it.
112+
*
113+
* @return nothing, as this method always throws an exception
114+
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
115+
*/
116+
@Override
117+
public final NormsFormat normsFormat() {
118+
throw new UnsupportedOperationException();
119+
}
120+
121+
/**
122+
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
123+
* This method is never called in practice, as we rewrite field infos to override the info about which features are present in
124+
* the index. Even if term vectors are present, field info lies about it.
125+
*
126+
* @return nothing, as this method always throws an exception
127+
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
128+
*/
129+
@Override
130+
public final TermVectorsFormat termVectorsFormat() {
131+
throw new UnsupportedOperationException();
132+
}
133+
134+
/**
135+
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
136+
* The knn vectors can't be present because it is not supported yet in any of the lucene versions that we support for archive indices.
137+
*
138+
* @return nothing, as this method always throws an exception
139+
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
140+
*/
141+
@Override
142+
public final KnnVectorsFormat knnVectorsFormat() {
143+
throw new UnsupportedOperationException();
144+
}
145+
146+
/**
147+
* Returns the original {@link SegmentInfoFormat} used by this codec.
148+
* This method should be implemented by subclasses to provide the specific
149+
* {@link SegmentInfoFormat} that this codec is intended to use.
150+
*
151+
* @return the original {@link SegmentInfoFormat} used by this codec
152+
*/
153+
protected abstract SegmentInfoFormat originalSegmentInfoFormat();
154+
155+
/**
156+
* Returns the original {@link FieldInfosFormat} used by this codec.
157+
* This method should be implemented by subclasses to provide the specific
158+
* {@link FieldInfosFormat} that this codec is intended to use.
159+
*
160+
* @return the original {@link FieldInfosFormat} used by this codec
161+
*/
162+
protected abstract FieldInfosFormat originalFieldInfosFormat();
163+
89164
// mark all fields as no term vectors, no norms, no payloads, and no vectors.
90165
private static FieldInfos filterFields(FieldInfos fieldInfos) {
91166
List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
@@ -118,13 +193,14 @@ private static FieldInfos filterFields(FieldInfos fieldInfos) {
118193
}
119194

120195
public static SegmentInfo wrap(SegmentInfo segmentInfo) {
121-
final Codec codec = segmentInfo.getCodec();
196+
Codec codec = getBackwardCompatibleCodec(segmentInfo.getCodec());
197+
122198
final SegmentInfo segmentInfo1 = new SegmentInfo(
123199
segmentInfo.dir,
124200
// Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation)
125201
// TODO: perhaps store the original version information in attributes so that we can retrieve it later when needed?
126-
org.apache.lucene.util.Version.LATEST,
127-
org.apache.lucene.util.Version.LATEST,
202+
Version.LATEST,
203+
Version.LATEST,
128204
segmentInfo.name,
129205
segmentInfo.maxDoc(),
130206
segmentInfo.getUseCompoundFile(),
@@ -139,6 +215,29 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) {
139215
return segmentInfo1;
140216
}
141217

218+
/**
219+
* Returns a backward-compatible codec for the given codec. If the codec is one of the known Lucene 8.x codecs,
220+
* it returns a corresponding read-only backward-compatible codec. Otherwise, it returns the original codec.
221+
* Lucene 8.x codecs are still shipped with the current version of Lucene.
222+
* Earlier codecs we are providing directly they will also be read-only backward-compatible, but they don't require the renaming.
223+
*
224+
* This switch is only for indices created in ES 6.x, later written into in ES 7.x (Lucene 8.x). Indices created
225+
* in ES 7.x can be read directly by ES if marked read-only, without going through archive indices.
226+
*/
227+
@UpdateForV10(owner = UpdateForV10.Owner.SEARCH_FOUNDATIONS)
228+
private static Codec getBackwardCompatibleCodec(Codec codec) {
229+
if (codec == null) return null;
230+
231+
return switch (codec.getClass().getSimpleName()) {
232+
case "Lucene70Codec" -> new BWCLucene70Codec();
233+
case "Lucene80Codec" -> new BWCLucene80Codec();
234+
case "Lucene84Codec" -> new BWCLucene84Codec();
235+
case "Lucene86Codec" -> new BWCLucene86Codec();
236+
case "Lucene87Codec" -> new BWCLucene87Codec();
237+
default -> codec;
238+
};
239+
}
240+
142241
/**
143242
* In-memory postings format that shows no postings available.
144243
*/

0 commit comments

Comments
 (0)