Skip to content

Commit 6425a61

Browse files
authored
Merge branch 'main' into esql_ts_agg_detect
2 parents 7782f8f + a857290 commit 6425a61

File tree

40 files changed

+1345
-700
lines changed

40 files changed

+1345
-700
lines changed

docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,37 @@ The `linear` retriever supports the following normalizers:
9191
score = (score - min) / (max - min)
9292
```
9393
* `l2_norm`: Normalizes scores using the L2 norm of the score values {applies_to}`stack: ga 9.1`
94+
95+
## Example
96+
97+
This example of a hybrid search weights KNN results five times more heavily than BM25 results in the final ranking.
98+
99+
```console
100+
GET my_index/_search
101+
{
102+
"retriever": {
103+
"linear": {
104+
"retrievers": [
105+
{
106+
"retriever": {
107+
"knn": {
108+
...
109+
}
110+
},
111+
"weight": 5 # KNN query weighted 5x
112+
},
113+
{
114+
"retriever": {
115+
"standard": {
116+
...
117+
}
118+
},
119+
"weight": 1.5 # BM25 query weighted 1.5x
120+
}
121+
]
122+
}
123+
}
124+
}
125+
```
126+
127+
See also [this hybrid search example](retrievers-examples.md#retrievers-examples-linear-retriever).

test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/SourceTransforms.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import java.util.function.Function;
1919
import java.util.stream.Collectors;
2020

21-
class SourceTransforms {
21+
public class SourceTransforms {
2222
/**
2323
* This preprocessing step makes it easier to match the document using a unified structure.
2424
* It performs following modifications:
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.datageneration.queries;
11+
12+
import org.elasticsearch.datageneration.FieldType;
13+
import org.elasticsearch.index.query.QueryBuilder;
14+
import org.elasticsearch.index.query.QueryBuilders;
15+
import org.elasticsearch.test.ESTestCase;
16+
17+
import java.util.ArrayList;
18+
import java.util.Arrays;
19+
import java.util.List;
20+
import java.util.Map;
21+
22+
public interface LeafQueryGenerator {
23+
24+
List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value);
25+
26+
/**
27+
* Build a query for a specific type. If the field is nested, this query will need to be wrapped in nested queries.
28+
* @param type the type to build a query for
29+
* @return a generator that can build queries for this type
30+
*/
31+
static LeafQueryGenerator buildForType(String type) {
32+
LeafQueryGenerator noQueries = (Map<String, Object> fieldMapping, String path, Object value) -> List.of();
33+
34+
FieldType fieldType = FieldType.tryParse(type);
35+
if (fieldType == null) {
36+
return noQueries;
37+
}
38+
39+
return switch (fieldType) {
40+
case KEYWORD -> new KeywordQueryGenerator();
41+
case TEXT -> new TextQueryGenerator();
42+
case WILDCARD -> new WildcardQueryGenerator();
43+
default -> noQueries;
44+
};
45+
}
46+
47+
class KeywordQueryGenerator implements LeafQueryGenerator {
48+
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
49+
if (fieldMapping != null) {
50+
boolean isIndexed = (Boolean) fieldMapping.getOrDefault("index", true);
51+
boolean hasDocValues = (Boolean) fieldMapping.getOrDefault("doc_values", true);
52+
if (isIndexed == false && hasDocValues == false) {
53+
return List.of();
54+
}
55+
}
56+
return List.of(QueryBuilders.termQuery(path, value));
57+
}
58+
}
59+
60+
class WildcardQueryGenerator implements LeafQueryGenerator {
61+
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
62+
// Queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144
63+
if (containsHighSurrogates((String) value)) {
64+
return List.of();
65+
}
66+
return List.of(QueryBuilders.termQuery(path, value), QueryBuilders.wildcardQuery(path, value + "*"));
67+
}
68+
}
69+
70+
class TextQueryGenerator implements LeafQueryGenerator {
71+
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
72+
if (fieldMapping != null) {
73+
boolean isIndexed = (Boolean) fieldMapping.getOrDefault("index", true);
74+
if (isIndexed == false) {
75+
return List.of();
76+
}
77+
}
78+
79+
var results = new ArrayList<QueryBuilder>();
80+
results.add(QueryBuilders.matchQuery(path, value));
81+
var phraseQuery = buildPhraseQuery(path, (String) value);
82+
if (phraseQuery != null) {
83+
results.add(phraseQuery);
84+
}
85+
return results;
86+
}
87+
88+
private static QueryBuilder buildPhraseQuery(String path, String value) {
89+
var tokens = Arrays.asList(value.split("[^a-zA-Z0-9]"));
90+
if (tokens.isEmpty()) {
91+
return null;
92+
}
93+
94+
int low = ESTestCase.randomIntBetween(0, tokens.size() - 1);
95+
int hi = ESTestCase.randomIntBetween(low + 1, tokens.size());
96+
var phrase = String.join(" ", tokens.subList(low, hi));
97+
return QueryBuilders.matchPhraseQuery(path, phrase);
98+
}
99+
}
100+
101+
static boolean containsHighSurrogates(String s) {
102+
for (int i = 0; i < s.length(); i++) {
103+
if (Character.isHighSurrogate(s.charAt(i))) {
104+
return true;
105+
}
106+
}
107+
return false;
108+
}
109+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.datageneration.queries;
11+
12+
import org.apache.lucene.search.join.ScoreMode;
13+
import org.elasticsearch.datageneration.Mapping;
14+
import org.elasticsearch.index.query.QueryBuilder;
15+
import org.elasticsearch.index.query.QueryBuilders;
16+
17+
import java.util.ArrayList;
18+
import java.util.Arrays;
19+
import java.util.List;
20+
import java.util.Map;
21+
22+
public class QueryGenerator {
23+
24+
private final Mapping mapping;
25+
26+
public QueryGenerator(Mapping mapping) {
27+
this.mapping = mapping;
28+
}
29+
30+
public List<QueryBuilder> generateQueries(String type, String path, Object value) {
31+
// This query generator cannot handle fields with periods in the name.
32+
if (path.equals("host.name")) {
33+
return List.of();
34+
}
35+
// Can handle dynamically mapped fields, but not runtime fields
36+
if (isRuntimeField(path)) {
37+
return List.of();
38+
}
39+
var leafQueryGenerator = LeafQueryGenerator.buildForType(type);
40+
var fieldMapping = mapping.lookup().get(path);
41+
var leafQueries = leafQueryGenerator.generate(fieldMapping, path, value);
42+
return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList();
43+
}
44+
45+
private QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) {
46+
String[] parts = path.split("\\.");
47+
List<String> nestedPaths = getNestedPathPrefixes(parts);
48+
QueryBuilder query = leafQuery;
49+
for (String nestedPath : nestedPaths.reversed()) {
50+
query = QueryBuilders.nestedQuery(nestedPath, query, ScoreMode.Max);
51+
}
52+
return query;
53+
}
54+
55+
@SuppressWarnings("unchecked")
56+
private List<String> getNestedPathPrefixes(String[] path) {
57+
Map<String, Object> mapping = this.mapping.raw();
58+
mapping = (Map<String, Object>) mapping.get("_doc");
59+
mapping = (Map<String, Object>) mapping.get("properties");
60+
61+
var result = new ArrayList<String>();
62+
for (int i = 0; i < path.length - 1; i++) {
63+
var field = path[i];
64+
mapping = (Map<String, Object>) mapping.get(field);
65+
66+
// dynamic field
67+
if (mapping == null) {
68+
break;
69+
}
70+
71+
boolean nested = "nested".equals(mapping.get("type"));
72+
if (nested) {
73+
result.add(String.join(".", Arrays.copyOfRange(path, 0, i + 1)));
74+
}
75+
mapping = (Map<String, Object>) mapping.get("properties");
76+
}
77+
return result;
78+
}
79+
80+
@SuppressWarnings("unchecked")
81+
private boolean isRuntimeField(String path) {
82+
String[] parts = path.split("\\.");
83+
var topLevelMapping = (Map<String, Object>) mapping.raw().get("_doc");
84+
boolean inRuntimeContext = "runtime".equals(topLevelMapping.get("dynamic"));
85+
for (int i = 0; i < parts.length - 1; i++) {
86+
var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1));
87+
Map<String, Object> fieldMapping = mapping.lookup().get(pathToHere);
88+
if (fieldMapping == null) {
89+
break;
90+
}
91+
if (fieldMapping.containsKey("dynamic")) {
92+
// lower down dynamic definitions override higher up behavior
93+
inRuntimeContext = "runtime".equals(fieldMapping.get("dynamic"));
94+
}
95+
}
96+
return inRuntimeContext;
97+
}
98+
}

x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/CannedSourceOperator.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,20 @@ public static List<Page> deepCopyOf(BlockFactory blockFactory, List<Page> pages)
8484
try {
8585
for (Page p : pages) {
8686
Block[] blocks = new Block[p.getBlockCount()];
87-
for (int b = 0; b < blocks.length; b++) {
88-
Block orig = p.getBlock(b);
89-
try (Block.Builder builder = orig.elementType().newBlockBuilder(p.getPositionCount(), blockFactory)) {
90-
builder.copyFrom(orig, 0, p.getPositionCount());
91-
blocks[b] = builder.build();
87+
try {
88+
for (int b = 0; b < blocks.length; b++) {
89+
Block orig = p.getBlock(b);
90+
try (Block.Builder builder = orig.elementType().newBlockBuilder(p.getPositionCount(), blockFactory)) {
91+
builder.copyFrom(orig, 0, p.getPositionCount());
92+
blocks[b] = builder.build();
93+
}
9294
}
95+
out.add(new Page(blocks));
96+
} catch (Exception e) {
97+
// Something went wrong, release the blocks.
98+
Releasables.closeExpectNoException(blocks);
99+
throw e;
93100
}
94-
out.add(new Page(blocks));
95101
}
96102
} finally {
97103
if (pages.size() != out.size()) {

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import org.apache.lucene.sandbox.document.HalfFloatPoint;
1313
import org.apache.lucene.util.BytesRef;
1414
import org.elasticsearch.ExceptionsHelper;
15-
import org.elasticsearch.action.ActionListener;
15+
import org.elasticsearch.client.internal.Client;
1616
import org.elasticsearch.cluster.RemoteException;
1717
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
1818
import org.elasticsearch.cluster.project.ProjectResolver;
@@ -76,7 +76,7 @@
7676
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.NotEquals;
7777
import org.elasticsearch.xpack.esql.index.EsIndex;
7878
import org.elasticsearch.xpack.esql.inference.InferenceResolution;
79-
import org.elasticsearch.xpack.esql.inference.InferenceRunner;
79+
import org.elasticsearch.xpack.esql.inference.InferenceService;
8080
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
8181
import org.elasticsearch.xpack.esql.parser.QueryParam;
8282
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
@@ -161,8 +161,6 @@
161161
import static org.hamcrest.Matchers.instanceOf;
162162
import static org.junit.Assert.assertNotNull;
163163
import static org.junit.Assert.assertNull;
164-
import static org.mockito.ArgumentMatchers.any;
165-
import static org.mockito.Mockito.doAnswer;
166164
import static org.mockito.Mockito.mock;
167165

168166
public final class EsqlTestUtils {
@@ -422,20 +420,9 @@ public static LogicalOptimizerContext unboundLogicalOptimizerContext() {
422420
mock(ProjectResolver.class),
423421
mock(IndexNameExpressionResolver.class),
424422
null,
425-
mockInferenceRunner()
423+
new InferenceService(mock(Client.class))
426424
);
427425

428-
@SuppressWarnings("unchecked")
429-
private static InferenceRunner mockInferenceRunner() {
430-
InferenceRunner inferenceRunner = mock(InferenceRunner.class);
431-
doAnswer(i -> {
432-
i.getArgument(1, ActionListener.class).onResponse(emptyInferenceResolution());
433-
return null;
434-
}).when(inferenceRunner).resolveInferenceIds(any(), any());
435-
436-
return inferenceRunner;
437-
}
438-
439426
private EsqlTestUtils() {}
440427

441428
public static Configuration configuration(QueryPragmas pragmas, String query) {

0 commit comments

Comments
 (0)