Skip to content

Commit aa99392

Browse files
Support partial sort fields in TopN pushdown (#116043)
* Support partial sort fields in TopN pushdown * Update docs/changelog/116043.yaml * Update docs/changelog/116043.yaml * Refine physical planner tests
1 parent c00abac commit aa99392

File tree

3 files changed

+108
-6
lines changed

3 files changed

+108
-6
lines changed

docs/changelog/116043.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 116043
2+
summary: Support partial sort fields in TopN pushdown
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 114515

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,7 @@ && canPushDownOrders(topNExec.order(), hasIdenticalDelegate)) {
184184
break;
185185
}
186186
}
187-
// TODO: We can push down partial sorts where `pushableSorts.size() < orders.size()`, but that should involve benchmarks
188-
if (pushableSorts.size() > 0 && pushableSorts.size() == orders.size()) {
187+
if (pushableSorts.isEmpty() == false) {
189188
return new PushableCompoundExec(evalExec, queryExec, pushableSorts);
190189
}
191190
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java

Lines changed: 101 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4984,8 +4984,101 @@ public void testPushTopNDistanceWithCompoundFilterToSource() {
49844984
}
49854985

49864986
/**
4987-
* This test shows that with an additional EVAL used in the filter, we can no longer push down the SORT distance.
4988-
* TODO: This could be optimized in future work. Consider moving much of EnableSpatialDistancePushdown into logical planning.
4987+
* Tests that multiple sorts, including distance and a field, are pushed down to the source.
4988+
* <code>
4989+
* ProjectExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7]]
4990+
* \_TopNExec[[
4991+
* Order[distance{r}#4,ASC,LAST],
4992+
* Order[scalerank{f}#27,ASC,LAST],
4993+
* Order[scale{r}#7,DESC,FIRST],
4994+
* Order[loc{r}#10,DESC,FIRST]
4995+
* ],5[INTEGER],0]
4996+
* \_ExchangeExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7,
4997+
* distance{r}#4, loc{r}#10],false]
4998+
* \_ProjectExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7,
4999+
* distance{r}#4, loc{r}#10]]
5000+
* \_FieldExtractExec[abbrev{f}#25, name{f}#26, country{f}#30, city{f}#31][]
5001+
* \_EvalExec[[
5002+
* STDISTANCE(location{f}#29,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distance,
5003+
* 10[INTEGER] - scalerank{f}#27 AS scale, TOSTRING(location{f}#29) AS loc
5004+
* ]]
5005+
* \_FieldExtractExec[location{f}#29, scalerank{f}#27][]
5006+
* \_EsQueryExec[airports], indexMode[standard], query[{
5007+
* "bool":{
5008+
* "filter":[
5009+
* {"esql_single_value":{"field":"scalerank","next":{...},"source":"scalerank &lt; 6@3:31"}},
5010+
* {"bool":{
5011+
* "must":[
5012+
* {"geo_shape":{"location":{"relation":"INTERSECTS","shape":{...}}}},
5013+
* {"geo_shape":{"location":{"relation":"DISJOINT","shape":{...}}}}
5014+
* ],"boost":1.0}}],"boost":1.0}}][_doc{f}#44], limit[5], sort[[
5015+
* GeoDistanceSort[field=location{f}#29, direction=ASC, lat=55.673, lon=12.565],
5016+
* FieldSort[field=scalerank{f}#27, direction=ASC, nulls=LAST]
5017+
* ]] estimatedRowSize[303]
5018+
* </code>
5019+
*/
5020+
public void testPushTopNDistanceAndPushableFieldWithCompoundFilterToSource() {
5021+
var optimized = optimizedPlan(physicalPlan("""
5022+
FROM airports
5023+
| EVAL distance = ST_DISTANCE(location, TO_GEOPOINT("POINT(12.565 55.673)")), scale = 10 - scalerank, loc = location::string
5024+
| WHERE distance < 500000 AND scalerank < 6 AND distance > 10000
5025+
| SORT distance ASC, scalerank ASC, scale DESC, loc DESC
5026+
| LIMIT 5
5027+
| KEEP abbrev, name, location, country, city, scalerank, scale
5028+
""", airports));
5029+
5030+
var project = as(optimized, ProjectExec.class);
5031+
var topN = as(project.child(), TopNExec.class);
5032+
assertThat(topN.order().size(), is(4));
5033+
var exchange = asRemoteExchange(topN.child());
5034+
5035+
project = as(exchange.child(), ProjectExec.class);
5036+
assertThat(
5037+
names(project.projections()),
5038+
contains("abbrev", "name", "location", "country", "city", "scalerank", "scale", "distance", "loc")
5039+
);
5040+
var extract = as(project.child(), FieldExtractExec.class);
5041+
assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
5042+
var evalExec = as(extract.child(), EvalExec.class);
5043+
var alias = as(evalExec.fields().get(0), Alias.class);
5044+
assertThat(alias.name(), is("distance"));
5045+
var stDistance = as(alias.child(), StDistance.class);
5046+
assertThat(stDistance.left().toString(), startsWith("location"));
5047+
extract = as(evalExec.child(), FieldExtractExec.class);
5048+
assertThat(names(extract.attributesToExtract()), contains("location", "scalerank"));
5049+
var source = source(extract.child());
5050+
5051+
// Assert that the TopN(distance) is pushed down as geo-sort(location)
5052+
assertThat(source.limit(), is(topN.limit()));
5053+
Set<String> orderSet = orderAsSet(topN.order().subList(0, 2));
5054+
Set<String> sortsSet = sortsAsSet(source.sorts(), Map.of("location", "distance"));
5055+
assertThat(orderSet, is(sortsSet));
5056+
5057+
// Fine-grained checks on the pushed down sort
5058+
assertThat(source.limit(), is(l(5)));
5059+
assertThat(source.sorts().size(), is(2));
5060+
EsQueryExec.Sort sort = source.sorts().get(0);
5061+
assertThat(sort.direction(), is(Order.OrderDirection.ASC));
5062+
assertThat(name(sort.field()), is("location"));
5063+
assertThat(sort.sortBuilder(), isA(GeoDistanceSortBuilder.class));
5064+
sort = source.sorts().get(1);
5065+
assertThat(sort.direction(), is(Order.OrderDirection.ASC));
5066+
assertThat(name(sort.field()), is("scalerank"));
5067+
assertThat(sort.sortBuilder(), isA(FieldSortBuilder.class));
5068+
5069+
// Fine-grained checks on the pushed down query
5070+
var bool = as(source.query(), BoolQueryBuilder.class);
5071+
var rangeQueryBuilders = bool.filter().stream().filter(p -> p instanceof SingleValueQuery.Builder).toList();
5072+
assertThat("Expected one range query builder", rangeQueryBuilders.size(), equalTo(1));
5073+
assertThat(((SingleValueQuery.Builder) rangeQueryBuilders.get(0)).field(), equalTo("scalerank"));
5074+
var filterBool = bool.filter().stream().filter(p -> p instanceof BoolQueryBuilder).toList();
5075+
var fb = as(filterBool.get(0), BoolQueryBuilder.class);
5076+
var shapeQueryBuilders = fb.must().stream().filter(p -> p instanceof SpatialRelatesQuery.ShapeQueryBuilder).toList();
5077+
assertShapeQueryRange(shapeQueryBuilders, 10000.0, 500000.0);
5078+
}
5079+
5080+
/**
5081+
* This test shows that if the filter contains a predicate on the same field that is sorted, we cannot push down the sort.
49895082
* <code>
49905083
* ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scalerank{f}#25 AS scale]]
49915084
* \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scalerank{f}#25,ASC,LAST]],5[INTEGER],0]
@@ -5021,6 +5114,7 @@ public void testPushTopNDistanceAndNonPushableEvalWithCompoundFilterToSource() {
50215114

50225115
var project = as(optimized, ProjectExec.class);
50235116
var topN = as(project.child(), TopNExec.class);
5117+
assertThat(topN.order().size(), is(2));
50245118
var exchange = asRemoteExchange(topN.child());
50255119

50265120
project = as(exchange.child(), ProjectExec.class);
@@ -5059,7 +5153,7 @@ public void testPushTopNDistanceAndNonPushableEvalWithCompoundFilterToSource() {
50595153
}
50605154

50615155
/**
5062-
* This test further shows that with a non-aliasing function, with the same name, less gets pushed down.
5156+
* This test shows that if the filter contains a predicate on the same field that is sorted, we cannot push down the sort.
50635157
* <code>
50645158
* ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scale{r}#10]]
50655159
* \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scale{r}#10,ASC,LAST]],5[INTEGER],0]
@@ -5096,6 +5190,7 @@ public void testPushTopNDistanceAndNonPushableEvalsWithCompoundFilterToSource()
50965190
""", airports));
50975191
var project = as(optimized, ProjectExec.class);
50985192
var topN = as(project.child(), TopNExec.class);
5193+
assertThat(topN.order().size(), is(2));
50995194
var exchange = asRemoteExchange(topN.child());
51005195

51015196
project = as(exchange.child(), ProjectExec.class);
@@ -5133,7 +5228,8 @@ public void testPushTopNDistanceAndNonPushableEvalsWithCompoundFilterToSource()
51335228
}
51345229

51355230
/**
5136-
* This test shows that with if the top level AND'd predicate contains a non-pushable component, we should not push anything.
5231+
* This test shows that with if the top level predicate contains a non-pushable component (eg. disjunction),
5232+
* we should not push down the filter.
51375233
* <code>
51385234
* ProjectExec[[abbrev{f}#8612, name{f}#8613, location{f}#8616, country{f}#8617, city{f}#8618, scalerank{f}#8614 AS scale]]
51395235
* \_TopNExec[[Order[distance{r}#8596,ASC,LAST], Order[scalerank{f}#8614,ASC,LAST]],5[INTEGER],0]
@@ -5171,6 +5267,7 @@ public void testPushTopNDistanceWithCompoundFilterToSourceAndDisjunctiveNonPusha
51715267

51725268
var project = as(optimized, ProjectExec.class);
51735269
var topN = as(project.child(), TopNExec.class);
5270+
assertThat(topN.order().size(), is(2));
51745271
var exchange = asRemoteExchange(topN.child());
51755272

51765273
project = as(exchange.child(), ProjectExec.class);

0 commit comments

Comments
 (0)