Skip to content

Commit cd93409

Browse files
Support partial sort fields in TopN pushdown (#116043) (#129199) (#129203)
* Support partial sort fields in TopN pushdown * Update docs/changelog/116043.yaml * Update docs/changelog/116043.yaml * Refine physical planner tests
1 parent fa62e94 commit cd93409

File tree

3 files changed

+108
-6
lines changed

3 files changed

+108
-6
lines changed

docs/changelog/116043.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 116043
2+
summary: Support partial sort fields in TopN pushdown
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 114515

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,7 @@ && canPushDownOrders(topNExec.order(), lucenePushdownPredicates)) {
189189
break;
190190
}
191191
}
192-
// TODO: We can push down partial sorts where `pushableSorts.size() < orders.size()`, but that should involve benchmarks
193-
if (pushableSorts.size() > 0 && pushableSorts.size() == orders.size()) {
192+
if (pushableSorts.isEmpty() == false) {
194193
return new PushableCompoundExec(evalExec, queryExec, pushableSorts);
195194
}
196195
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java

Lines changed: 101 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5853,8 +5853,101 @@ public void testPushTopNDistanceWithCompoundFilterToSource() {
58535853
}
58545854

58555855
/**
5856-
* This test shows that with an additional EVAL used in the filter, we can no longer push down the SORT distance.
5857-
* TODO: This could be optimized in future work. Consider moving much of EnableSpatialDistancePushdown into logical planning.
5856+
* Tests that multiple sorts, including distance and a field, are pushed down to the source.
5857+
* <code>
5858+
* ProjectExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7]]
5859+
* \_TopNExec[[
5860+
* Order[distance{r}#4,ASC,LAST],
5861+
* Order[scalerank{f}#27,ASC,LAST],
5862+
* Order[scale{r}#7,DESC,FIRST],
5863+
* Order[loc{r}#10,DESC,FIRST]
5864+
* ],5[INTEGER],0]
5865+
* \_ExchangeExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7,
5866+
* distance{r}#4, loc{r}#10],false]
5867+
* \_ProjectExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7,
5868+
* distance{r}#4, loc{r}#10]]
5869+
* \_FieldExtractExec[abbrev{f}#25, name{f}#26, country{f}#30, city{f}#31][]
5870+
* \_EvalExec[[
5871+
* STDISTANCE(location{f}#29,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distance,
5872+
* 10[INTEGER] - scalerank{f}#27 AS scale, TOSTRING(location{f}#29) AS loc
5873+
* ]]
5874+
* \_FieldExtractExec[location{f}#29, scalerank{f}#27][]
5875+
* \_EsQueryExec[airports], indexMode[standard], query[{
5876+
* "bool":{
5877+
* "filter":[
5878+
* {"esql_single_value":{"field":"scalerank","next":{...},"source":"scalerank &lt; 6@3:31"}},
5879+
* {"bool":{
5880+
* "must":[
5881+
* {"geo_shape":{"location":{"relation":"INTERSECTS","shape":{...}}}},
5882+
* {"geo_shape":{"location":{"relation":"DISJOINT","shape":{...}}}}
5883+
* ],"boost":1.0}}],"boost":1.0}}][_doc{f}#44], limit[5], sort[[
5884+
* GeoDistanceSort[field=location{f}#29, direction=ASC, lat=55.673, lon=12.565],
5885+
* FieldSort[field=scalerank{f}#27, direction=ASC, nulls=LAST]
5886+
* ]] estimatedRowSize[303]
5887+
* </code>
5888+
*/
5889+
public void testPushTopNDistanceAndPushableFieldWithCompoundFilterToSource() {
5890+
var optimized = optimizedPlan(physicalPlan("""
5891+
FROM airports
5892+
| EVAL distance = ST_DISTANCE(location, TO_GEOPOINT("POINT(12.565 55.673)")), scale = 10 - scalerank, loc = location::string
5893+
| WHERE distance < 500000 AND scalerank < 6 AND distance > 10000
5894+
| SORT distance ASC, scalerank ASC, scale DESC, loc DESC
5895+
| LIMIT 5
5896+
| KEEP abbrev, name, location, country, city, scalerank, scale
5897+
""", airports));
5898+
5899+
var project = as(optimized, ProjectExec.class);
5900+
var topN = as(project.child(), TopNExec.class);
5901+
assertThat(topN.order().size(), is(4));
5902+
var exchange = asRemoteExchange(topN.child());
5903+
5904+
project = as(exchange.child(), ProjectExec.class);
5905+
assertThat(
5906+
names(project.projections()),
5907+
contains("abbrev", "name", "location", "country", "city", "scalerank", "scale", "distance", "loc")
5908+
);
5909+
var extract = as(project.child(), FieldExtractExec.class);
5910+
assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
5911+
var evalExec = as(extract.child(), EvalExec.class);
5912+
var alias = as(evalExec.fields().get(0), Alias.class);
5913+
assertThat(alias.name(), is("distance"));
5914+
var stDistance = as(alias.child(), StDistance.class);
5915+
assertThat(stDistance.left().toString(), startsWith("location"));
5916+
extract = as(evalExec.child(), FieldExtractExec.class);
5917+
assertThat(names(extract.attributesToExtract()), contains("location", "scalerank"));
5918+
var source = source(extract.child());
5919+
5920+
// Assert that the TopN(distance) is pushed down as geo-sort(location)
5921+
assertThat(source.limit(), is(topN.limit()));
5922+
Set<String> orderSet = orderAsSet(topN.order().subList(0, 2));
5923+
Set<String> sortsSet = sortsAsSet(source.sorts(), Map.of("location", "distance"));
5924+
assertThat(orderSet, is(sortsSet));
5925+
5926+
// Fine-grained checks on the pushed down sort
5927+
assertThat(source.limit(), is(l(5)));
5928+
assertThat(source.sorts().size(), is(2));
5929+
EsQueryExec.Sort sort = source.sorts().get(0);
5930+
assertThat(sort.direction(), is(Order.OrderDirection.ASC));
5931+
assertThat(name(sort.field()), is("location"));
5932+
assertThat(sort.sortBuilder(), isA(GeoDistanceSortBuilder.class));
5933+
sort = source.sorts().get(1);
5934+
assertThat(sort.direction(), is(Order.OrderDirection.ASC));
5935+
assertThat(name(sort.field()), is("scalerank"));
5936+
assertThat(sort.sortBuilder(), isA(FieldSortBuilder.class));
5937+
5938+
// Fine-grained checks on the pushed down query
5939+
var bool = as(source.query(), BoolQueryBuilder.class);
5940+
var rangeQueryBuilders = bool.filter().stream().filter(p -> p instanceof SingleValueQuery.Builder).toList();
5941+
assertThat("Expected one range query builder", rangeQueryBuilders.size(), equalTo(1));
5942+
assertThat(((SingleValueQuery.Builder) rangeQueryBuilders.get(0)).field(), equalTo("scalerank"));
5943+
var filterBool = bool.filter().stream().filter(p -> p instanceof BoolQueryBuilder).toList();
5944+
var fb = as(filterBool.get(0), BoolQueryBuilder.class);
5945+
var shapeQueryBuilders = fb.must().stream().filter(p -> p instanceof SpatialRelatesQuery.ShapeQueryBuilder).toList();
5946+
assertShapeQueryRange(shapeQueryBuilders, 10000.0, 500000.0);
5947+
}
5948+
5949+
/**
5950+
* This test shows that if the filter contains a predicate on the same field that is sorted, we cannot push down the sort.
58585951
* <code>
58595952
* ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scalerank{f}#25 AS scale]]
58605953
* \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scalerank{f}#25,ASC,LAST]],5[INTEGER],0]
@@ -5890,6 +5983,7 @@ public void testPushTopNDistanceAndNonPushableEvalWithCompoundFilterToSource() {
58905983

58915984
var project = as(optimized, ProjectExec.class);
58925985
var topN = as(project.child(), TopNExec.class);
5986+
assertThat(topN.order().size(), is(2));
58935987
var exchange = asRemoteExchange(topN.child());
58945988

58955989
project = as(exchange.child(), ProjectExec.class);
@@ -5928,7 +6022,7 @@ public void testPushTopNDistanceAndNonPushableEvalWithCompoundFilterToSource() {
59286022
}
59296023

59306024
/**
5931-
* This test further shows that with a non-aliasing function, with the same name, less gets pushed down.
6025+
* This test shows that if the filter contains a predicate on the same field that is sorted, we cannot push down the sort.
59326026
* <code>
59336027
* ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scale{r}#10]]
59346028
* \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scale{r}#10,ASC,LAST]],5[INTEGER],0]
@@ -5965,6 +6059,7 @@ public void testPushTopNDistanceAndNonPushableEvalsWithCompoundFilterToSource()
59656059
""", airports));
59666060
var project = as(optimized, ProjectExec.class);
59676061
var topN = as(project.child(), TopNExec.class);
6062+
assertThat(topN.order().size(), is(2));
59686063
var exchange = asRemoteExchange(topN.child());
59696064

59706065
project = as(exchange.child(), ProjectExec.class);
@@ -6002,7 +6097,8 @@ public void testPushTopNDistanceAndNonPushableEvalsWithCompoundFilterToSource()
60026097
}
60036098

60046099
/**
6005-
* This test shows that with if the top level AND'd predicate contains a non-pushable component, we should not push anything.
6100+
* This test shows that with if the top level predicate contains a non-pushable component (eg. disjunction),
6101+
* we should not push down the filter.
60066102
* <code>
60076103
* ProjectExec[[abbrev{f}#8612, name{f}#8613, location{f}#8616, country{f}#8617, city{f}#8618, scalerank{f}#8614 AS scale]]
60086104
* \_TopNExec[[Order[distance{r}#8596,ASC,LAST], Order[scalerank{f}#8614,ASC,LAST]],5[INTEGER],0]
@@ -6040,6 +6136,7 @@ public void testPushTopNDistanceWithCompoundFilterToSourceAndDisjunctiveNonPusha
60406136

60416137
var project = as(optimized, ProjectExec.class);
60426138
var topN = as(project.child(), TopNExec.class);
6139+
assertThat(topN.order().size(), is(2));
60436140
var exchange = asRemoteExchange(topN.child());
60446141

60456142
project = as(exchange.child(), ProjectExec.class);

0 commit comments

Comments
 (0)