Skip to content

Commit 3d11035

Browse files
authored
Fix concurrency issue in ScriptSortBuilder (elastic#123757) (elastic#124513)
Inter-segment concurrency is disabled whenever sort by field, included script sorting, is used in a search request. The reason why sort by field does not use concurrency is that there are some performance implications, given that the hit queue in Lucene is build per slice and the different search threads don't share information about the documents they have already visited etc. The reason why script sort has concurrency disabled is that the script sorting implementation is not thread safe. This commit addresses such concurrency issue and re-enables search concurrency for search requests that use script sorting. In addition, missing tests are added to cover for sort scripts that rely on _score being available and top_hits aggregation with a scripted sort clause.
1 parent 63e9931 commit 3d11035

File tree

9 files changed

+262
-35
lines changed

9 files changed

+262
-35
lines changed

docs/changelog/123757.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 123757
2+
summary: Fix concurrency issue in `ScriptSortBuilder`
3+
area: Search
4+
type: bug
5+
issues: []

modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,3 +482,118 @@
482482
}]
483483
- match: { error.root_cause.0.type: "illegal_argument_exception" }
484484
- match: { error.root_cause.0.reason: "script score function must not produce negative scores, but got: [-9.0]"}
485+
486+
---
487+
488+
"Script Sort + _score":
489+
- do:
490+
index:
491+
index: test
492+
id: "1"
493+
body: { "test": "a", "num1": 1.0, "type" : "first" }
494+
- do:
495+
index:
496+
index: test
497+
id: "2"
498+
body: { "test": "b", "num1": 2.0, "type" : "first" }
499+
- do:
500+
index:
501+
index: test
502+
id: "3"
503+
body: { "test": "c", "num1": 3.0, "type" : "first" }
504+
- do:
505+
index:
506+
index: test
507+
id: "4"
508+
body: { "test": "d", "num1": 4.0, "type" : "second" }
509+
- do:
510+
index:
511+
index: test
512+
id: "5"
513+
body: { "test": "e", "num1": 5.0, "type" : "second" }
514+
- do:
515+
indices.refresh: {}
516+
517+
- do:
518+
search:
519+
rest_total_hits_as_int: true
520+
index: test
521+
body:
522+
sort: [
523+
{
524+
_script: {
525+
script: {
526+
lang: "painless",
527+
source: "doc['num1'].value + _score"
528+
},
529+
type: "number"
530+
}
531+
}
532+
]
533+
534+
- match: { hits.total: 5 }
535+
- match: { hits.hits.0.sort.0: 2.0 }
536+
- match: { hits.hits.1.sort.0: 3.0 }
537+
- match: { hits.hits.2.sort.0: 4.0 }
538+
- match: { hits.hits.3.sort.0: 5.0 }
539+
- match: { hits.hits.4.sort.0: 6.0 }
540+
541+
- do:
542+
search:
543+
rest_total_hits_as_int: true
544+
index: test
545+
body:
546+
sort: [
547+
{
548+
_script: {
549+
script: {
550+
lang: "painless",
551+
source: "doc['test.keyword'].value + '-' + _score"
552+
},
553+
type: "string"
554+
}
555+
}
556+
]
557+
558+
- match: { hits.total: 5 }
559+
- match: { hits.hits.0.sort.0: "a-1.0" }
560+
- match: { hits.hits.1.sort.0: "b-1.0" }
561+
- match: { hits.hits.2.sort.0: "c-1.0" }
562+
- match: { hits.hits.3.sort.0: "d-1.0" }
563+
- match: { hits.hits.4.sort.0: "e-1.0" }
564+
565+
- do:
566+
search:
567+
rest_total_hits_as_int: true
568+
index: test
569+
body:
570+
aggs:
571+
test:
572+
terms:
573+
field: type.keyword
574+
aggs:
575+
top_hits:
576+
top_hits:
577+
sort: [
578+
{
579+
_script: {
580+
script: {
581+
lang: "painless",
582+
source: "doc['test.keyword'].value + '-' + _score"
583+
},
584+
type: "string"
585+
}
586+
},
587+
"_score"
588+
]
589+
size: 1
590+
591+
- match: { hits.total: 5 }
592+
- match: { aggregations.test.buckets.0.key: "first" }
593+
- match: { aggregations.test.buckets.0.top_hits.hits.total: 3 }
594+
- match: { aggregations.test.buckets.0.top_hits.hits.hits.0.sort.0: "a-1.0" }
595+
- match: { aggregations.test.buckets.0.top_hits.hits.hits.0.sort.1: 1.0 }
596+
- match: { aggregations.test.buckets.1.key: "second" }
597+
- match: { aggregations.test.buckets.1.top_hits.hits.total: 2 }
598+
- match: { aggregations.test.buckets.1.top_hits.hits.hits.0.sort.0: "d-1.0" }
599+
- match: { aggregations.test.buckets.1.top_hits.hits.hits.0.sort.1: 1.0 }

server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/TopHitsIT.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.common.document.DocumentField;
1919
import org.elasticsearch.common.settings.Settings;
2020
import org.elasticsearch.index.IndexSettings;
21+
import org.elasticsearch.index.fielddata.ScriptDocValues;
2122
import org.elasticsearch.index.query.MatchAllQueryBuilder;
2223
import org.elasticsearch.index.query.QueryBuilders;
2324
import org.elasticsearch.index.seqno.SequenceNumbers;
@@ -64,6 +65,7 @@
6465
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
6566
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
6667
import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
68+
import static org.elasticsearch.script.MockScriptPlugin.NAME;
6769
import static org.elasticsearch.search.aggregations.AggregationBuilders.global;
6870
import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram;
6971
import static org.elasticsearch.search.aggregations.AggregationBuilders.max;
@@ -102,7 +104,12 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
102104
public static class CustomScriptPlugin extends MockScriptPlugin {
103105
@Override
104106
protected Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
105-
return Collections.singletonMap("5", script -> "5");
107+
return Map.of("5", script -> "5", "doc['sort'].value", CustomScriptPlugin::sortDoubleScript);
108+
}
109+
110+
private static Double sortDoubleScript(Map<String, Object> vars) {
111+
Map<?, ?> doc = (Map) vars.get("doc");
112+
return ((Number) ((ScriptDocValues<?>) doc.get("sort")).get(0)).doubleValue();
106113
}
107114

108115
@Override
@@ -1268,6 +1275,41 @@ public void testWithRescore() {
12681275
);
12691276
}
12701277

1278+
public void testScriptSorting() {
1279+
Script script = new Script(ScriptType.INLINE, NAME, "doc['sort'].value", Collections.emptyMap());
1280+
assertNoFailuresAndResponse(
1281+
prepareSearch("idx").addAggregation(
1282+
terms("terms").executionHint(randomExecutionHint())
1283+
.field(TERMS_AGGS_FIELD)
1284+
.subAggregation(topHits("hits").sort(SortBuilders.scriptSort(script, ScriptSortType.NUMBER).order(SortOrder.DESC)))
1285+
),
1286+
response -> {
1287+
Terms terms = response.getAggregations().get("terms");
1288+
assertThat(terms, notNullValue());
1289+
assertThat(terms.getName(), equalTo("terms"));
1290+
assertThat(terms.getBuckets().size(), equalTo(5));
1291+
1292+
double higestSortValue = 0;
1293+
for (int i = 0; i < 5; i++) {
1294+
Terms.Bucket bucket = terms.getBucketByKey("val" + i);
1295+
assertThat(bucket, notNullValue());
1296+
assertThat(key(bucket), equalTo("val" + i));
1297+
assertThat(bucket.getDocCount(), equalTo(10L));
1298+
TopHits topHits = bucket.getAggregations().get("hits");
1299+
SearchHits hits = topHits.getHits();
1300+
assertThat(hits.getTotalHits().value(), equalTo(10L));
1301+
assertThat(hits.getHits().length, equalTo(3));
1302+
higestSortValue += 10;
1303+
assertThat((Double) hits.getAt(0).getSortValues()[0], equalTo(higestSortValue));
1304+
assertThat((Double) hits.getAt(1).getSortValues()[0], equalTo(higestSortValue - 1));
1305+
assertThat((Double) hits.getAt(2).getSortValues()[0], equalTo(higestSortValue - 2));
1306+
1307+
assertThat(hits.getAt(0).getSourceAsMap().size(), equalTo(5));
1308+
}
1309+
}
1310+
);
1311+
}
1312+
12711313
public static class FetchPlugin extends Plugin implements SearchPlugin {
12721314
@Override
12731315
public List<FetchSubPhase> getFetchSubPhases(FetchPhaseConstructionContext context) {

server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -87,21 +87,21 @@ public static class CustomScriptPlugin extends MockScriptPlugin {
8787
@Override
8888
protected Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
8989
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
90-
scripts.put("doc['number'].value", vars -> sortDoubleScript(vars));
91-
scripts.put("doc['keyword'].value", vars -> sortStringScript(vars));
90+
scripts.put("doc['number'].value", CustomScriptPlugin::sortDoubleScript);
91+
scripts.put("doc['keyword'].value", CustomScriptPlugin::sortStringScript);
9292
return scripts;
9393
}
9494

95-
static Double sortDoubleScript(Map<String, Object> vars) {
95+
private static Double sortDoubleScript(Map<String, Object> vars) {
9696
Map<?, ?> doc = (Map) vars.get("doc");
97-
Double index = ((Number) ((ScriptDocValues<?>) doc.get("number")).get(0)).doubleValue();
98-
return index;
97+
Double score = (Double) vars.get("_score");
98+
return ((Number) ((ScriptDocValues<?>) doc.get("number")).get(0)).doubleValue() + score;
9999
}
100100

101-
static String sortStringScript(Map<String, Object> vars) {
101+
private static String sortStringScript(Map<String, Object> vars) {
102102
Map<?, ?> doc = (Map) vars.get("doc");
103-
String value = ((String) ((ScriptDocValues<?>) doc.get("keyword")).get(0));
104-
return value;
103+
Double score = (Double) vars.get("_score");
104+
return ((ScriptDocValues<?>) doc.get("keyword")).get(0) + ",_score=" + score;
105105
}
106106
}
107107

@@ -1665,14 +1665,14 @@ public void testCustomFormat() throws Exception {
16651665
);
16661666
}
16671667

1668-
public void testScriptFieldSort() throws Exception {
1668+
public void testScriptFieldSort() {
16691669
assertAcked(prepareCreate("test").setMapping("keyword", "type=keyword", "number", "type=integer"));
16701670
ensureGreen();
16711671
final int numDocs = randomIntBetween(10, 20);
16721672
IndexRequestBuilder[] indexReqs = new IndexRequestBuilder[numDocs];
16731673
List<String> keywords = new ArrayList<>();
16741674
for (int i = 0; i < numDocs; ++i) {
1675-
indexReqs[i] = prepareIndex("test").setSource("number", i, "keyword", Integer.toString(i));
1675+
indexReqs[i] = prepareIndex("test").setSource("number", i, "keyword", Integer.toString(i), "version", i + "." + i);
16761676
keywords.add(Integer.toString(i));
16771677
}
16781678
Collections.sort(keywords);
@@ -1686,7 +1686,7 @@ public void testScriptFieldSort() throws Exception {
16861686
.addSort(SortBuilders.scriptSort(script, ScriptSortBuilder.ScriptSortType.NUMBER))
16871687
.addSort(SortBuilders.scoreSort()),
16881688
response -> {
1689-
double expectedValue = 0;
1689+
double expectedValue = 1; // start from 1 because it includes _score, 1.0f for all docs
16901690
for (SearchHit hit : response.getHits()) {
16911691
assertThat(hit.getSortValues().length, equalTo(2));
16921692
assertThat(hit.getSortValues()[0], equalTo(expectedValue++));
@@ -1707,7 +1707,7 @@ public void testScriptFieldSort() throws Exception {
17071707
int expectedValue = 0;
17081708
for (SearchHit hit : response.getHits()) {
17091709
assertThat(hit.getSortValues().length, equalTo(2));
1710-
assertThat(hit.getSortValues()[0], equalTo(keywords.get(expectedValue++)));
1710+
assertThat(hit.getSortValues()[0], equalTo(keywords.get(expectedValue++) + ",_score=1.0"));
17111711
assertThat(hit.getSortValues()[1], equalTo(1f));
17121712
}
17131713
}

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefFieldComparatorSource.java

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.apache.lucene.index.SortedSetDocValues;
1616
import org.apache.lucene.search.DocIdSetIterator;
1717
import org.apache.lucene.search.FieldComparator;
18+
import org.apache.lucene.search.LeafFieldComparator;
1819
import org.apache.lucene.search.Pruning;
1920
import org.apache.lucene.search.Scorable;
2021
import org.apache.lucene.search.SortField;
@@ -67,7 +68,7 @@ protected SortedBinaryDocValues getValues(LeafReaderContext context) throws IOEx
6768
return indexFieldData.load(context).getBytesValues();
6869
}
6970

70-
protected void setScorer(Scorable scorer) {}
71+
protected void setScorer(LeafReaderContext context, Scorable scorer) {}
7172

7273
@Override
7374
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning enableSkipping, boolean reversed) {
@@ -120,10 +121,43 @@ protected BinaryDocValues getBinaryDocValues(LeafReaderContext context, String f
120121
}
121122

122123
@Override
123-
public void setScorer(Scorable scorer) {
124-
BytesRefFieldComparatorSource.this.setScorer(scorer);
125-
}
124+
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
125+
LeafFieldComparator leafComparator = super.getLeafComparator(context);
126+
// TopFieldCollector interacts with inter-segment concurrency by creating a FieldValueHitQueue per slice, each one with a
127+
// specific instance of the FieldComparator. This ensures sequential execution across LeafFieldComparators returned by
128+
// the same parent FieldComparator. That allows for effectively sharing the same instance of leaf comparator, like in this
129+
// case in the Lucene code. That's fine dealing with sorting by field, but not when using script sorting, because we then
130+
// need to set to Scorer to the specific leaf comparator, to make the _score variable available in sort scripts. The
131+
// setScorer call happens concurrently across slices and needs to target the specific leaf context that is being searched.
132+
return new LeafFieldComparator() {
133+
@Override
134+
public void setBottom(int slot) throws IOException {
135+
leafComparator.setBottom(slot);
136+
}
137+
138+
@Override
139+
public int compareBottom(int doc) throws IOException {
140+
return leafComparator.compareBottom(doc);
141+
}
142+
143+
@Override
144+
public int compareTop(int doc) throws IOException {
145+
return leafComparator.compareTop(doc);
146+
}
147+
148+
@Override
149+
public void copy(int slot, int doc) throws IOException {
150+
leafComparator.copy(slot, doc);
151+
}
126152

153+
@Override
154+
public void setScorer(Scorable scorer) {
155+
// this ensures that the scorer is set for the specific leaf comparator
156+
// corresponding to the leaf context we are scoring
157+
BytesRefFieldComparatorSource.this.setScorer(context, scorer);
158+
}
159+
};
160+
}
127161
};
128162
}
129163

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleValuesComparatorSource.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ private NumericDoubleValues getNumericDocValues(LeafReaderContext context, doubl
7171
}
7272
}
7373

74-
protected void setScorer(Scorable scorer) {}
74+
protected void setScorer(LeafReaderContext context, Scorable scorer) {}
7575

7676
@Override
7777
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning enableSkipping, boolean reversed) {
@@ -91,7 +91,7 @@ protected NumericDocValues getNumericDocValues(LeafReaderContext context, String
9191

9292
@Override
9393
public void setScorer(Scorable scorer) {
94-
DoubleValuesComparatorSource.this.setScorer(scorer);
94+
DoubleValuesComparatorSource.this.setScorer(context, scorer);
9595
}
9696
};
9797
}

0 commit comments

Comments
 (0)