Skip to content

Commit 255f7fe

Browse files
authored
[8.18] Fix concurrency issue in ScriptSortBuilder (elastic#123757) (elastic#124514)
* Fix concurrency issue in ScriptSortBuilder (elastic#123757) Inter-segment concurrency is disabled whenever sort by field, included script sorting, is used in a search request. The reason why sort by field does not use concurrency is that there are some performance implications, given that the hit queue in Lucene is build per slice and the different search threads don't share information about the documents they have already visited etc. The reason why script sort has concurrency disabled is that the script sorting implementation is not thread safe. This commit addresses such concurrency issue and re-enables search concurrency for search requests that use script sorting. In addition, missing tests are added to cover for sort scripts that rely on _score being available and top_hits aggregation with a scripted sort clause. * iter
1 parent f790cb8 commit 255f7fe

File tree

9 files changed

+262
-35
lines changed

9 files changed

+262
-35
lines changed

docs/changelog/123757.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 123757
2+
summary: Fix concurrency issue in `ScriptSortBuilder`
3+
area: Search
4+
type: bug
5+
issues: []

modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,3 +482,118 @@
482482
}]
483483
- match: { error.root_cause.0.type: "illegal_argument_exception" }
484484
- match: { error.root_cause.0.reason: "script score function must not produce negative scores, but got: [-9.0]"}
485+
486+
---
487+
488+
"Script Sort + _score":
489+
- do:
490+
index:
491+
index: test
492+
id: "1"
493+
body: { "test": "a", "num1": 1.0, "type" : "first" }
494+
- do:
495+
index:
496+
index: test
497+
id: "2"
498+
body: { "test": "b", "num1": 2.0, "type" : "first" }
499+
- do:
500+
index:
501+
index: test
502+
id: "3"
503+
body: { "test": "c", "num1": 3.0, "type" : "first" }
504+
- do:
505+
index:
506+
index: test
507+
id: "4"
508+
body: { "test": "d", "num1": 4.0, "type" : "second" }
509+
- do:
510+
index:
511+
index: test
512+
id: "5"
513+
body: { "test": "e", "num1": 5.0, "type" : "second" }
514+
- do:
515+
indices.refresh: {}
516+
517+
- do:
518+
search:
519+
rest_total_hits_as_int: true
520+
index: test
521+
body:
522+
sort: [
523+
{
524+
_script: {
525+
script: {
526+
lang: "painless",
527+
source: "doc['num1'].value + _score"
528+
},
529+
type: "number"
530+
}
531+
}
532+
]
533+
534+
- match: { hits.total: 5 }
535+
- match: { hits.hits.0.sort.0: 2.0 }
536+
- match: { hits.hits.1.sort.0: 3.0 }
537+
- match: { hits.hits.2.sort.0: 4.0 }
538+
- match: { hits.hits.3.sort.0: 5.0 }
539+
- match: { hits.hits.4.sort.0: 6.0 }
540+
541+
- do:
542+
search:
543+
rest_total_hits_as_int: true
544+
index: test
545+
body:
546+
sort: [
547+
{
548+
_script: {
549+
script: {
550+
lang: "painless",
551+
source: "doc['test.keyword'].value + '-' + _score"
552+
},
553+
type: "string"
554+
}
555+
}
556+
]
557+
558+
- match: { hits.total: 5 }
559+
- match: { hits.hits.0.sort.0: "a-1.0" }
560+
- match: { hits.hits.1.sort.0: "b-1.0" }
561+
- match: { hits.hits.2.sort.0: "c-1.0" }
562+
- match: { hits.hits.3.sort.0: "d-1.0" }
563+
- match: { hits.hits.4.sort.0: "e-1.0" }
564+
565+
- do:
566+
search:
567+
rest_total_hits_as_int: true
568+
index: test
569+
body:
570+
aggs:
571+
test:
572+
terms:
573+
field: type.keyword
574+
aggs:
575+
top_hits:
576+
top_hits:
577+
sort: [
578+
{
579+
_script: {
580+
script: {
581+
lang: "painless",
582+
source: "doc['test.keyword'].value + '-' + _score"
583+
},
584+
type: "string"
585+
}
586+
},
587+
"_score"
588+
]
589+
size: 1
590+
591+
- match: { hits.total: 5 }
592+
- match: { aggregations.test.buckets.0.key: "first" }
593+
- match: { aggregations.test.buckets.0.top_hits.hits.total: 3 }
594+
- match: { aggregations.test.buckets.0.top_hits.hits.hits.0.sort.0: "a-1.0" }
595+
- match: { aggregations.test.buckets.0.top_hits.hits.hits.0.sort.1: 1.0 }
596+
- match: { aggregations.test.buckets.1.key: "second" }
597+
- match: { aggregations.test.buckets.1.top_hits.hits.total: 2 }
598+
- match: { aggregations.test.buckets.1.top_hits.hits.hits.0.sort.0: "d-1.0" }
599+
- match: { aggregations.test.buckets.1.top_hits.hits.hits.0.sort.1: 1.0 }

server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/TopHitsIT.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.common.document.DocumentField;
1919
import org.elasticsearch.common.settings.Settings;
2020
import org.elasticsearch.index.IndexSettings;
21+
import org.elasticsearch.index.fielddata.ScriptDocValues;
2122
import org.elasticsearch.index.query.MatchAllQueryBuilder;
2223
import org.elasticsearch.index.query.QueryBuilders;
2324
import org.elasticsearch.index.seqno.SequenceNumbers;
@@ -64,6 +65,7 @@
6465
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
6566
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
6667
import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
68+
import static org.elasticsearch.script.MockScriptPlugin.NAME;
6769
import static org.elasticsearch.search.aggregations.AggregationBuilders.global;
6870
import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram;
6971
import static org.elasticsearch.search.aggregations.AggregationBuilders.max;
@@ -102,7 +104,12 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
102104
public static class CustomScriptPlugin extends MockScriptPlugin {
103105
@Override
104106
protected Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
105-
return Collections.singletonMap("5", script -> "5");
107+
return Map.of("5", script -> "5", "doc['sort'].value", CustomScriptPlugin::sortDoubleScript);
108+
}
109+
110+
private static Double sortDoubleScript(Map<String, Object> vars) {
111+
Map<?, ?> doc = (Map) vars.get("doc");
112+
return ((Number) ((ScriptDocValues<?>) doc.get("sort")).get(0)).doubleValue();
106113
}
107114

108115
@Override
@@ -1268,6 +1275,41 @@ public void testWithRescore() {
12681275
);
12691276
}
12701277

1278+
public void testScriptSorting() {
1279+
Script script = new Script(ScriptType.INLINE, NAME, "doc['sort'].value", Collections.emptyMap());
1280+
assertNoFailuresAndResponse(
1281+
prepareSearch("idx").addAggregation(
1282+
terms("terms").executionHint(randomExecutionHint())
1283+
.field(TERMS_AGGS_FIELD)
1284+
.subAggregation(topHits("hits").sort(SortBuilders.scriptSort(script, ScriptSortType.NUMBER).order(SortOrder.DESC)))
1285+
),
1286+
response -> {
1287+
Terms terms = response.getAggregations().get("terms");
1288+
assertThat(terms, notNullValue());
1289+
assertThat(terms.getName(), equalTo("terms"));
1290+
assertThat(terms.getBuckets().size(), equalTo(5));
1291+
1292+
double higestSortValue = 0;
1293+
for (int i = 0; i < 5; i++) {
1294+
Terms.Bucket bucket = terms.getBucketByKey("val" + i);
1295+
assertThat(bucket, notNullValue());
1296+
assertThat(key(bucket), equalTo("val" + i));
1297+
assertThat(bucket.getDocCount(), equalTo(10L));
1298+
TopHits topHits = bucket.getAggregations().get("hits");
1299+
SearchHits hits = topHits.getHits();
1300+
assertThat(hits.getTotalHits().value, equalTo(10L));
1301+
assertThat(hits.getHits().length, equalTo(3));
1302+
higestSortValue += 10;
1303+
assertThat((Double) hits.getAt(0).getSortValues()[0], equalTo(higestSortValue));
1304+
assertThat((Double) hits.getAt(1).getSortValues()[0], equalTo(higestSortValue - 1));
1305+
assertThat((Double) hits.getAt(2).getSortValues()[0], equalTo(higestSortValue - 2));
1306+
1307+
assertThat(hits.getAt(0).getSourceAsMap().size(), equalTo(5));
1308+
}
1309+
}
1310+
);
1311+
}
1312+
12711313
public static class FetchPlugin extends Plugin implements SearchPlugin {
12721314
@Override
12731315
public List<FetchSubPhase> getFetchSubPhases(FetchPhaseConstructionContext context) {

server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,21 +85,21 @@ public static class CustomScriptPlugin extends MockScriptPlugin {
8585
@Override
8686
protected Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
8787
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
88-
scripts.put("doc['number'].value", vars -> sortDoubleScript(vars));
89-
scripts.put("doc['keyword'].value", vars -> sortStringScript(vars));
88+
scripts.put("doc['number'].value", CustomScriptPlugin::sortDoubleScript);
89+
scripts.put("doc['keyword'].value", CustomScriptPlugin::sortStringScript);
9090
return scripts;
9191
}
9292

93-
static Double sortDoubleScript(Map<String, Object> vars) {
93+
private static Double sortDoubleScript(Map<String, Object> vars) {
9494
Map<?, ?> doc = (Map) vars.get("doc");
95-
Double index = ((Number) ((ScriptDocValues<?>) doc.get("number")).get(0)).doubleValue();
96-
return index;
95+
Double score = (Double) vars.get("_score");
96+
return ((Number) ((ScriptDocValues<?>) doc.get("number")).get(0)).doubleValue() + score;
9797
}
9898

99-
static String sortStringScript(Map<String, Object> vars) {
99+
private static String sortStringScript(Map<String, Object> vars) {
100100
Map<?, ?> doc = (Map) vars.get("doc");
101-
String value = ((String) ((ScriptDocValues<?>) doc.get("keyword")).get(0));
102-
return value;
101+
Double score = (Double) vars.get("_score");
102+
return ((ScriptDocValues<?>) doc.get("keyword")).get(0) + ",_score=" + score;
103103
}
104104
}
105105

@@ -1763,14 +1763,14 @@ public void testCustomFormat() throws Exception {
17631763
);
17641764
}
17651765

1766-
public void testScriptFieldSort() throws Exception {
1766+
public void testScriptFieldSort() {
17671767
assertAcked(prepareCreate("test").setMapping("keyword", "type=keyword", "number", "type=integer"));
17681768
ensureGreen();
17691769
final int numDocs = randomIntBetween(10, 20);
17701770
IndexRequestBuilder[] indexReqs = new IndexRequestBuilder[numDocs];
17711771
List<String> keywords = new ArrayList<>();
17721772
for (int i = 0; i < numDocs; ++i) {
1773-
indexReqs[i] = prepareIndex("test").setSource("number", i, "keyword", Integer.toString(i));
1773+
indexReqs[i] = prepareIndex("test").setSource("number", i, "keyword", Integer.toString(i), "version", i + "." + i);
17741774
keywords.add(Integer.toString(i));
17751775
}
17761776
Collections.sort(keywords);
@@ -1784,7 +1784,7 @@ public void testScriptFieldSort() throws Exception {
17841784
.addSort(SortBuilders.scriptSort(script, ScriptSortBuilder.ScriptSortType.NUMBER))
17851785
.addSort(SortBuilders.scoreSort()),
17861786
response -> {
1787-
double expectedValue = 0;
1787+
double expectedValue = 1; // start from 1 because it includes _score, 1.0f for all docs
17881788
for (SearchHit hit : response.getHits()) {
17891789
assertThat(hit.getSortValues().length, equalTo(2));
17901790
assertThat(hit.getSortValues()[0], equalTo(expectedValue++));
@@ -1805,7 +1805,7 @@ public void testScriptFieldSort() throws Exception {
18051805
int expectedValue = 0;
18061806
for (SearchHit hit : response.getHits()) {
18071807
assertThat(hit.getSortValues().length, equalTo(2));
1808-
assertThat(hit.getSortValues()[0], equalTo(keywords.get(expectedValue++)));
1808+
assertThat(hit.getSortValues()[0], equalTo(keywords.get(expectedValue++) + ",_score=1.0"));
18091809
assertThat(hit.getSortValues()[1], equalTo(1f));
18101810
}
18111811
}

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefFieldComparatorSource.java

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.apache.lucene.index.SortedSetDocValues;
1616
import org.apache.lucene.search.DocIdSetIterator;
1717
import org.apache.lucene.search.FieldComparator;
18+
import org.apache.lucene.search.LeafFieldComparator;
1819
import org.apache.lucene.search.Pruning;
1920
import org.apache.lucene.search.Scorable;
2021
import org.apache.lucene.search.SortField;
@@ -67,7 +68,7 @@ protected SortedBinaryDocValues getValues(LeafReaderContext context) throws IOEx
6768
return indexFieldData.load(context).getBytesValues();
6869
}
6970

70-
protected void setScorer(Scorable scorer) {}
71+
protected void setScorer(LeafReaderContext context, Scorable scorer) {}
7172

7273
@Override
7374
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning enableSkipping, boolean reversed) {
@@ -120,10 +121,43 @@ protected BinaryDocValues getBinaryDocValues(LeafReaderContext context, String f
120121
}
121122

122123
@Override
123-
public void setScorer(Scorable scorer) {
124-
BytesRefFieldComparatorSource.this.setScorer(scorer);
125-
}
124+
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
125+
LeafFieldComparator leafComparator = super.getLeafComparator(context);
126+
// TopFieldCollector interacts with inter-segment concurrency by creating a FieldValueHitQueue per slice, each one with a
127+
// specific instance of the FieldComparator. This ensures sequential execution across LeafFieldComparators returned by
128+
// the same parent FieldComparator. That allows for effectively sharing the same instance of leaf comparator, like in this
129+
// case in the Lucene code. That's fine dealing with sorting by field, but not when using script sorting, because we then
130+
// need to set to Scorer to the specific leaf comparator, to make the _score variable available in sort scripts. The
131+
// setScorer call happens concurrently across slices and needs to target the specific leaf context that is being searched.
132+
return new LeafFieldComparator() {
133+
@Override
134+
public void setBottom(int slot) throws IOException {
135+
leafComparator.setBottom(slot);
136+
}
137+
138+
@Override
139+
public int compareBottom(int doc) throws IOException {
140+
return leafComparator.compareBottom(doc);
141+
}
142+
143+
@Override
144+
public int compareTop(int doc) throws IOException {
145+
return leafComparator.compareTop(doc);
146+
}
147+
148+
@Override
149+
public void copy(int slot, int doc) throws IOException {
150+
leafComparator.copy(slot, doc);
151+
}
126152

153+
@Override
154+
public void setScorer(Scorable scorer) {
155+
// this ensures that the scorer is set for the specific leaf comparator
156+
// corresponding to the leaf context we are scoring
157+
BytesRefFieldComparatorSource.this.setScorer(context, scorer);
158+
}
159+
};
160+
}
127161
};
128162
}
129163

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleValuesComparatorSource.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ private NumericDoubleValues getNumericDocValues(LeafReaderContext context, doubl
7171
}
7272
}
7373

74-
protected void setScorer(Scorable scorer) {}
74+
protected void setScorer(LeafReaderContext context, Scorable scorer) {}
7575

7676
@Override
7777
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning enableSkipping, boolean reversed) {
@@ -91,7 +91,7 @@ protected NumericDocValues getNumericDocValues(LeafReaderContext context, String
9191

9292
@Override
9393
public void setScorer(Scorable scorer) {
94-
DoubleValuesComparatorSource.this.setScorer(scorer);
94+
DoubleValuesComparatorSource.this.setScorer(context, scorer);
9595
}
9696
};
9797
}

0 commit comments

Comments
 (0)