Skip to content

Commit 595251d

Browse files
Support partial sort fields in TopN pushdown (#116043) (#129199)
* Support partial sort fields in TopN pushdown * Update docs/changelog/116043.yaml * Update docs/changelog/116043.yaml * Refine physical planner tests
1 parent 3e05896 commit 595251d

File tree

3 files changed

+108
-6
lines changed

3 files changed

+108
-6
lines changed

docs/changelog/116043.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 116043
2+
summary: Support partial sort fields in TopN pushdown
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 114515

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,7 @@ && canPushDownOrders(topNExec.order(), lucenePushdownPredicates)) {
189189
break;
190190
}
191191
}
192-
// TODO: We can push down partial sorts where `pushableSorts.size() < orders.size()`, but that should involve benchmarks
193-
if (pushableSorts.size() > 0 && pushableSorts.size() == orders.size()) {
192+
if (pushableSorts.isEmpty() == false) {
194193
return new PushableCompoundExec(evalExec, queryExec, pushableSorts);
195194
}
196195
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java

Lines changed: 101 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6035,8 +6035,101 @@ public void testPushTopNDistanceWithCompoundFilterToSource() {
60356035
}
60366036

60376037
/**
6038-
* This test shows that with an additional EVAL used in the filter, we can no longer push down the SORT distance.
6039-
* TODO: This could be optimized in future work. Consider moving much of EnableSpatialDistancePushdown into logical planning.
6038+
* Tests that multiple sorts, including distance and a field, are pushed down to the source.
6039+
* <code>
6040+
* ProjectExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7]]
6041+
* \_TopNExec[[
6042+
* Order[distance{r}#4,ASC,LAST],
6043+
* Order[scalerank{f}#27,ASC,LAST],
6044+
* Order[scale{r}#7,DESC,FIRST],
6045+
* Order[loc{r}#10,DESC,FIRST]
6046+
* ],5[INTEGER],0]
6047+
* \_ExchangeExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7,
6048+
* distance{r}#4, loc{r}#10],false]
6049+
* \_ProjectExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7,
6050+
* distance{r}#4, loc{r}#10]]
6051+
* \_FieldExtractExec[abbrev{f}#25, name{f}#26, country{f}#30, city{f}#31][]
6052+
* \_EvalExec[[
6053+
* STDISTANCE(location{f}#29,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distance,
6054+
* 10[INTEGER] - scalerank{f}#27 AS scale, TOSTRING(location{f}#29) AS loc
6055+
* ]]
6056+
* \_FieldExtractExec[location{f}#29, scalerank{f}#27][]
6057+
* \_EsQueryExec[airports], indexMode[standard], query[{
6058+
* "bool":{
6059+
* "filter":[
6060+
* {"esql_single_value":{"field":"scalerank","next":{...},"source":"scalerank &lt; 6@3:31"}},
6061+
* {"bool":{
6062+
* "must":[
6063+
* {"geo_shape":{"location":{"relation":"INTERSECTS","shape":{...}}}},
6064+
* {"geo_shape":{"location":{"relation":"DISJOINT","shape":{...}}}}
6065+
* ],"boost":1.0}}],"boost":1.0}}][_doc{f}#44], limit[5], sort[[
6066+
* GeoDistanceSort[field=location{f}#29, direction=ASC, lat=55.673, lon=12.565],
6067+
* FieldSort[field=scalerank{f}#27, direction=ASC, nulls=LAST]
6068+
* ]] estimatedRowSize[303]
6069+
* </code>
6070+
*/
6071+
public void testPushTopNDistanceAndPushableFieldWithCompoundFilterToSource() {
6072+
var optimized = optimizedPlan(physicalPlan("""
6073+
FROM airports
6074+
| EVAL distance = ST_DISTANCE(location, TO_GEOPOINT("POINT(12.565 55.673)")), scale = 10 - scalerank, loc = location::string
6075+
| WHERE distance < 500000 AND scalerank < 6 AND distance > 10000
6076+
| SORT distance ASC, scalerank ASC, scale DESC, loc DESC
6077+
| LIMIT 5
6078+
| KEEP abbrev, name, location, country, city, scalerank, scale
6079+
""", airports));
6080+
6081+
var project = as(optimized, ProjectExec.class);
6082+
var topN = as(project.child(), TopNExec.class);
6083+
assertThat(topN.order().size(), is(4));
6084+
var exchange = asRemoteExchange(topN.child());
6085+
6086+
project = as(exchange.child(), ProjectExec.class);
6087+
assertThat(
6088+
names(project.projections()),
6089+
contains("abbrev", "name", "location", "country", "city", "scalerank", "scale", "distance", "loc")
6090+
);
6091+
var extract = as(project.child(), FieldExtractExec.class);
6092+
assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
6093+
var evalExec = as(extract.child(), EvalExec.class);
6094+
var alias = as(evalExec.fields().get(0), Alias.class);
6095+
assertThat(alias.name(), is("distance"));
6096+
var stDistance = as(alias.child(), StDistance.class);
6097+
assertThat(stDistance.left().toString(), startsWith("location"));
6098+
extract = as(evalExec.child(), FieldExtractExec.class);
6099+
assertThat(names(extract.attributesToExtract()), contains("location", "scalerank"));
6100+
var source = source(extract.child());
6101+
6102+
// Assert that the TopN(distance) is pushed down as geo-sort(location)
6103+
assertThat(source.limit(), is(topN.limit()));
6104+
Set<String> orderSet = orderAsSet(topN.order().subList(0, 2));
6105+
Set<String> sortsSet = sortsAsSet(source.sorts(), Map.of("location", "distance"));
6106+
assertThat(orderSet, is(sortsSet));
6107+
6108+
// Fine-grained checks on the pushed down sort
6109+
assertThat(source.limit(), is(l(5)));
6110+
assertThat(source.sorts().size(), is(2));
6111+
EsQueryExec.Sort sort = source.sorts().get(0);
6112+
assertThat(sort.direction(), is(Order.OrderDirection.ASC));
6113+
assertThat(name(sort.field()), is("location"));
6114+
assertThat(sort.sortBuilder(), isA(GeoDistanceSortBuilder.class));
6115+
sort = source.sorts().get(1);
6116+
assertThat(sort.direction(), is(Order.OrderDirection.ASC));
6117+
assertThat(name(sort.field()), is("scalerank"));
6118+
assertThat(sort.sortBuilder(), isA(FieldSortBuilder.class));
6119+
6120+
// Fine-grained checks on the pushed down query
6121+
var bool = as(source.query(), BoolQueryBuilder.class);
6122+
var rangeQueryBuilders = bool.filter().stream().filter(p -> p instanceof SingleValueQuery.Builder).toList();
6123+
assertThat("Expected one range query builder", rangeQueryBuilders.size(), equalTo(1));
6124+
assertThat(((SingleValueQuery.Builder) rangeQueryBuilders.get(0)).field(), equalTo("scalerank"));
6125+
var filterBool = bool.filter().stream().filter(p -> p instanceof BoolQueryBuilder).toList();
6126+
var fb = as(filterBool.get(0), BoolQueryBuilder.class);
6127+
var shapeQueryBuilders = fb.must().stream().filter(p -> p instanceof SpatialRelatesQuery.ShapeQueryBuilder).toList();
6128+
assertShapeQueryRange(shapeQueryBuilders, 10000.0, 500000.0);
6129+
}
6130+
6131+
/**
6132+
* This test shows that if the filter contains a predicate on the same field that is sorted, we cannot push down the sort.
60406133
* <code>
60416134
* ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scalerank{f}#25 AS scale]]
60426135
* \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scalerank{f}#25,ASC,LAST]],5[INTEGER],0]
@@ -6072,6 +6165,7 @@ public void testPushTopNDistanceAndNonPushableEvalWithCompoundFilterToSource() {
60726165

60736166
var project = as(optimized, ProjectExec.class);
60746167
var topN = as(project.child(), TopNExec.class);
6168+
assertThat(topN.order().size(), is(2));
60756169
var exchange = asRemoteExchange(topN.child());
60766170

60776171
project = as(exchange.child(), ProjectExec.class);
@@ -6110,7 +6204,7 @@ public void testPushTopNDistanceAndNonPushableEvalWithCompoundFilterToSource() {
61106204
}
61116205

61126206
/**
6113-
* This test further shows that with a non-aliasing function, with the same name, less gets pushed down.
6207+
* This test shows that if the filter contains a predicate on the same field that is sorted, we cannot push down the sort.
61146208
* <code>
61156209
* ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scale{r}#10]]
61166210
* \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scale{r}#10,ASC,LAST]],5[INTEGER],0]
@@ -6147,6 +6241,7 @@ public void testPushTopNDistanceAndNonPushableEvalsWithCompoundFilterToSource()
61476241
""", airports));
61486242
var project = as(optimized, ProjectExec.class);
61496243
var topN = as(project.child(), TopNExec.class);
6244+
assertThat(topN.order().size(), is(2));
61506245
var exchange = asRemoteExchange(topN.child());
61516246

61526247
project = as(exchange.child(), ProjectExec.class);
@@ -6184,7 +6279,8 @@ public void testPushTopNDistanceAndNonPushableEvalsWithCompoundFilterToSource()
61846279
}
61856280

61866281
/**
6187-
* This test shows that with if the top level AND'd predicate contains a non-pushable component, we should not push anything.
6282+
* This test shows that with if the top level predicate contains a non-pushable component (eg. disjunction),
6283+
* we should not push down the filter.
61886284
* <code>
61896285
* ProjectExec[[abbrev{f}#8612, name{f}#8613, location{f}#8616, country{f}#8617, city{f}#8618, scalerank{f}#8614 AS scale]]
61906286
* \_TopNExec[[Order[distance{r}#8596,ASC,LAST], Order[scalerank{f}#8614,ASC,LAST]],5[INTEGER],0]
@@ -6222,6 +6318,7 @@ public void testPushTopNDistanceWithCompoundFilterToSourceAndDisjunctiveNonPusha
62226318

62236319
var project = as(optimized, ProjectExec.class);
62246320
var topN = as(project.child(), TopNExec.class);
6321+
assertThat(topN.order().size(), is(2));
62256322
var exchange = asRemoteExchange(topN.child());
62266323

62276324
project = as(exchange.child(), ProjectExec.class);

0 commit comments

Comments
 (0)