Skip to content

Commit e3326e2

Browse files
committed
Code changes made
1 parent 3b87c15 commit e3326e2

File tree

4 files changed

+195
-30
lines changed

4 files changed

+195
-30
lines changed

docs/reference/elasticsearch/rest-apis/retrievers.md

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,21 @@ A retriever that normalizes and linearly combines the scores of other retrievers
263263

264264
A list of the sub-retrievers' configuration, that we will take into account and whose result sets we will merge through a weighted sum. Each configuration can have a different weight and normalization depending on the specified retriever.
265265

266+
`normalizer`
267+
: (Optional, String)
268+
269+
Specifies a normalizer to be applied to all sub-retrievers. This provides a simple way to configure normalization for all retrievers at once.
270+
271+
The `normalizer` can be specified at the top level, at the per-retriever level, or both, with the following rules:
272+
273+
* If only the top-level `normalizer` is specified, it applies to all sub-retrievers.
274+
* If both a top-level and a per-retriever `normalizer` are specified, the per-retriever normalizer must be identical to the top-level one. If they differ, the request will fail.
275+
* If only per-retriever normalizers are specified, they can be different for each sub-retriever.
276+
* If no normalizer is specified at any level, no normalization is applied.
266277

267-
Each entry specifies the following parameters:
278+
Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`.
279+
280+
Each entry in the `retrievers` array specifies the following parameters:
268281

269282
`retriever`
270283
: (Required, a `retriever` object)
@@ -279,19 +292,13 @@ Each entry specifies the following parameters:
279292
`normalizer`
280293
: (Optional, String)
281294

282-
- Specifies how we will normalize the retriever’s scores, before applying the specified `weight`. Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`.
295+
Specifies how we will normalize this specific retriever’s scores, before applying the specified `weight`. If a top-level `normalizer` is also specified, this normalizer must be the same. Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`.
283296

284297
* `none`
285298
* `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula
286-
287-
```
288-
score = (score - min) / (max - min)
289-
```
290-
299+
`score = (score - min) / (max - min)`
291300
* `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values.
292301

293-
See also [this hybrid search example](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers.
294-
295302
`rank_window_size`
296303
: (Optional, integer)
297304

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java

Lines changed: 60 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,12 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder<Linea
8383
List<LinearRetrieverComponent> retrieverComponents = args[0] == null ? List.of() : (List<LinearRetrieverComponent>) args[0];
8484
List<String> fields = (List<String>) args[1];
8585
String query = (String) args[2];
86-
String normalizerName = (String) args[3];
87-
ScoreNormalizer normalizer = normalizerName == null ? null : ScoreNormalizer.valueOf(normalizerName);
86+
ScoreNormalizer normalizer = args[3] == null ? null : ScoreNormalizer.valueOf((String) args[3]);
8887
int rankWindowSize = args[4] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[4];
8988

9089
int index = 0;
9190
float[] weights = new float[retrieverComponents.size()];
9291
ScoreNormalizer[] normalizers = new ScoreNormalizer[retrieverComponents.size()];
93-
Arrays.fill(normalizers, normalizer);
9492
List<RetrieverSource> innerRetrievers = new ArrayList<>();
9593
for (LinearRetrieverComponent component : retrieverComponents) {
9694
innerRetrievers.add(RetrieverSource.from(component.retriever));
@@ -121,8 +119,7 @@ private static float[] getDefaultWeight(List<RetrieverSource> innerRetrievers) {
121119
private static ScoreNormalizer[] getDefaultNormalizers(List<RetrieverSource> innerRetrievers) {
122120
int size = innerRetrievers != null ? innerRetrievers.size() : 0;
123121
ScoreNormalizer[] normalizers = new ScoreNormalizer[size];
124-
Arrays.fill(normalizers, IdentityScoreNormalizer.INSTANCE);
125-
return normalizers;
122+
return new ScoreNormalizer[size];
126123
}
127124

128125
public static LinearRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException {
@@ -160,17 +157,44 @@ public LinearRetrieverBuilder(
160157
// Use a mutable list for innerRetrievers so that we can use addChild
161158
super(innerRetrievers == null ? new ArrayList<>() : new ArrayList<>(innerRetrievers), rankWindowSize);
162159
if (weights.length != this.innerRetrievers.size()) {
163-
throw new IllegalArgumentException("The number of weights must match the number of inner retrievers");
160+
throw new IllegalArgumentException(
161+
"["
162+
+ NAME
163+
+ "] the number of weights must be equal to the number of retrievers, but found ["
164+
+ weights.length
165+
+ "] weights and ["
166+
+ this.innerRetrievers.size()
167+
+ "] retrievers"
168+
);
164169
}
165170
if (normalizers.length != this.innerRetrievers.size()) {
166-
throw new IllegalArgumentException("The number of normalizers must match the number of inner retrievers");
171+
throw new IllegalArgumentException(
172+
"["
173+
+ NAME
174+
+ "] the number of normalizers must be equal to the number of retrievers, but found ["
175+
+ normalizers.length
176+
+ "] normalizers and ["
177+
+ this.innerRetrievers.size()
178+
+ "] retrievers"
179+
);
167180
}
168-
169-
this.fields = fields == null ? null : List.copyOf(fields);
170-
this.query = query;
171-
this.normalizer = normalizer;
172181
this.weights = weights;
173182
this.normalizers = normalizers;
183+
this.fields = fields;
184+
this.query = query;
185+
this.normalizer = normalizer;
186+
187+
if (normalizer != null) {
188+
for (ScoreNormalizer subNormalizer : normalizers) {
189+
if (subNormalizer != null && subNormalizer.equals(DEFAULT_NORMALIZER) == false && subNormalizer.equals(normalizer) == false) {
190+
throw new IllegalArgumentException(
191+
"top-level normalizer ["
192+
+ normalizer.getName()
193+
+ "] is specified and it should be the same as all sub-retriever normalizers"
194+
);
195+
}
196+
}
197+
}
174198
}
175199

176200
public LinearRetrieverBuilder(
@@ -228,8 +252,9 @@ public ActionRequestValidationException validate(
228252

229253
if (normalizer != null) {
230254
for (ScoreNormalizer perRetrieverNormalizer : normalizers) {
231-
boolean isExplicitSubNormalizer = perRetrieverNormalizer != null && !perRetrieverNormalizer.equals(DEFAULT_NORMALIZER);
232-
boolean isMismatch = isExplicitSubNormalizer && !perRetrieverNormalizer.equals(normalizer);
255+
boolean isExplicitSubNormalizer = perRetrieverNormalizer != null
256+
&& perRetrieverNormalizer.equals(DEFAULT_NORMALIZER) == false;
257+
boolean isMismatch = isExplicitSubNormalizer && perRetrieverNormalizer.equals(normalizer) == false;
233258
if (isMismatch) {
234259
validationException = addValidationError(
235260
String.format(
@@ -383,10 +408,26 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) {
383408
}
384409
if (normalizer != null) {
385410
ScoreNormalizer[] newNormalizers = new ScoreNormalizer[normalizers.length];
386-
for (int i = 0; i < normalizers.length; i++) {
387-
newNormalizers[i] = (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) ? normalizer : normalizers[i];
388-
}
389-
return new LinearRetrieverBuilder(innerRetrievers, fields, query, null, rankWindowSize, weights, newNormalizers);
411+
Arrays.fill(newNormalizers, normalizer);
412+
rewritten = new LinearRetrieverBuilder(
413+
rewritten.innerRetrievers,
414+
rewritten.fields,
415+
rewritten.query,
416+
null,
417+
rewritten.rankWindowSize,
418+
rewritten.weights,
419+
newNormalizers
420+
);
421+
} else {
422+
rewritten = new LinearRetrieverBuilder(
423+
rewritten.innerRetrievers,
424+
rewritten.fields,
425+
rewritten.query,
426+
rewritten.normalizer,
427+
rewritten.rankWindowSize,
428+
rewritten.weights,
429+
rewritten.normalizers
430+
);
390431
}
391432

392433
return rewritten;
@@ -413,7 +454,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept
413454
builder.startObject();
414455
builder.field(LinearRetrieverComponent.RETRIEVER_FIELD.getPreferredName(), entry.retriever());
415456
builder.field(LinearRetrieverComponent.WEIGHT_FIELD.getPreferredName(), weights[index]);
416-
if (normalizers[index] != null && !normalizers[index].equals(DEFAULT_NORMALIZER)) {
457+
if (normalizers[index] != null && normalizers[index].equals(DEFAULT_NORMALIZER) == false) {
417458
builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName());
418459
}
419460
builder.endObject();
@@ -432,7 +473,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept
432473
if (query != null) {
433474
builder.field(QUERY_FIELD.getPreferredName(), query);
434475
}
435-
if (normalizer != null && !normalizer.equals(DEFAULT_NORMALIZER)) {
476+
if (normalizer != null && normalizer.equals(DEFAULT_NORMALIZER) == false) {
436477
builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName());
437478
}
438479

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight,
3838
assert retrieverBuilder != null;
3939
this.retriever = retrieverBuilder;
4040
this.weight = weight == null ? DEFAULT_WEIGHT : weight;
41-
this.normalizer = normalizer == null ? DEFAULT_NORMALIZER : normalizer;
41+
this.normalizer = normalizer;
4242
if (this.weight < 0) {
4343
throw new IllegalArgumentException("[weight] must be non-negative");
4444
}
@@ -48,7 +48,7 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight,
4848
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
4949
builder.field(RETRIEVER_FIELD.getPreferredName(), retriever);
5050
builder.field(WEIGHT_FIELD.getPreferredName(), weight);
51-
if (normalizer != null && !normalizer.equals(DEFAULT_NORMALIZER)) {
51+
if (normalizer != null && normalizer.equals(DEFAULT_NORMALIZER) == false) {
5252
builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName());
5353
}
5454
return builder;

x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,3 +1333,120 @@ setup:
13331333
- match: { hits.total.value: 1 }
13341334
- match: { hits.hits.0._id: "1" }
13351335
- close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} }
1336+
1337+
---
1338+
"top level normalizer should apply to all sub-retrievers":
1339+
- do:
1340+
search:
1341+
index: test
1342+
body:
1343+
retriever:
1344+
linear:
1345+
normalizer: "minmax"
1346+
retrievers: [
1347+
{
1348+
retriever: {
1349+
standard: {
1350+
query: {
1351+
bool: {
1352+
should: [
1353+
{ constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 10.0 } },
1354+
{ constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 9.0 } },
1355+
{ constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 5.0 } }
1356+
]
1357+
}
1358+
}
1359+
}
1360+
},
1361+
weight: 10.0
1362+
},
1363+
{
1364+
retriever: {
1365+
knn: {
1366+
field: "vector",
1367+
query_vector: [ 4 ],
1368+
k: 1,
1369+
num_candidates: 1
1370+
}
1371+
},
1372+
weight: 2.0
1373+
}
1374+
]
1375+
1376+
- match: { hits.total.value: 4 }
1377+
- match: { hits.hits.0._id: "1" }
1378+
- close_to: { hits.hits.0._score: { value: 10.0, error: 0.001} }
1379+
- match: { hits.hits.1._id: "2" }
1380+
- close_to: { hits.hits.1._score: { value: 8.0, error: 0.001} }
1381+
- match: { hits.hits.2._id: "4" }
1382+
- close_to: { hits.hits.2._score: { value: 2.0, error: 0.001} }
1383+
- match: { hits.hits.3._id: "3" }
1384+
- close_to: { hits.hits.3._score: { value: 0.0, error: 0.001 } }
1385+
1386+
---
1387+
"should throw on different normalizers":
1388+
- do:
1389+
catch: /top-level normalizer \[minmax\] is specified and it should be the same as all sub-retriever normalizers/
1390+
search:
1391+
index: test
1392+
body:
1393+
retriever:
1394+
linear:
1395+
normalizer: "minmax"
1396+
retrievers: [
1397+
{
1398+
retriever: {
1399+
standard: {
1400+
query: {
1401+
constant_score: {
1402+
filter: {
1403+
term: {
1404+
keyword: {
1405+
value: "one"
1406+
}
1407+
}
1408+
},
1409+
boost: 10.0
1410+
}
1411+
}
1412+
}
1413+
},
1414+
weight: 1.0,
1415+
normalizer: "l2_norm"
1416+
}
1417+
]
1418+
1419+
---
1420+
"should not throw on same normalizers":
1421+
- do:
1422+
search:
1423+
index: test
1424+
body:
1425+
retriever:
1426+
linear:
1427+
normalizer: "minmax"
1428+
retrievers: [
1429+
{
1430+
retriever: {
1431+
standard: {
1432+
query: {
1433+
constant_score: {
1434+
filter: {
1435+
term: {
1436+
keyword: {
1437+
value: "one"
1438+
}
1439+
}
1440+
},
1441+
boost: 10.0
1442+
}
1443+
}
1444+
}
1445+
},
1446+
weight: 1.0,
1447+
normalizer: "minmax"
1448+
}
1449+
]
1450+
- match: { hits.total.value: 1 }
1451+
- match: { hits.hits.0._id: "1" }
1452+
- close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} }

0 commit comments

Comments
 (0)