Skip to content

Commit e9c145b

Browse files
authored
ESQL: Reserve memory TopN (#134235)
Tracks the more memory that's involved in topn. ## Lucene TopN Lucene doesn't track memory usage for TopN and can use a fair bit of it. Try this query: ``` FROM big_table | SORT a, b, c, d, e | LIMIT 1000000 | STATS MAX(a) ``` We attempt to return all million documents from lucene. Is we did this with the compute engine we're track all of the memory usage. With lucene we have to reserve it. In the case of the query above the sort keys weight 8 bytes each. 40 bytes total. Plus another 72 for Lucene's `FieldDoc`. And another 40 at least for copying to the values to `FieldDoc`. That totals something like 152 bytes a piece. That's 145mb. Worth tracking! ## Esql Engine TopN Esql *does* track memory for topn, but it doesn't track the memory used by the min heap itself. It's just a big array of pointers. But it can get very big!
1 parent 4eeaae3 commit e9c145b

File tree

9 files changed

+313
-70
lines changed

9 files changed

+313
-70
lines changed

docs/changelog/134235.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 134235
2+
summary: Reserve memory for Lucene's TopN
3+
area: ES|QL
4+
type: bug
5+
issues: []

test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java

Lines changed: 66 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public void skipOnAborted() {
9191
* This used to fail, but we've since compacted top n so it actually succeeds now.
9292
*/
9393
public void testSortByManyLongsSuccess() throws IOException {
94-
initManyLongs();
94+
initManyLongs(10);
9595
Map<String, Object> response = sortByManyLongs(500);
9696
ListMatcher columns = matchesList().item(matchesMap().entry("name", "a").entry("type", "long"))
9797
.item(matchesMap().entry("name", "b").entry("type", "long"));
@@ -108,7 +108,7 @@ public void testSortByManyLongsSuccess() throws IOException {
108108
* This used to crash the node with an out of memory, but now it just trips a circuit breaker.
109109
*/
110110
public void testSortByManyLongsTooMuchMemory() throws IOException {
111-
initManyLongs();
111+
initManyLongs(10);
112112
// 5000 is plenty to break on most nodes
113113
assertCircuitBreaks(attempt -> sortByManyLongs(attempt * 5000));
114114
}
@@ -117,7 +117,7 @@ public void testSortByManyLongsTooMuchMemory() throws IOException {
117117
* This should record an async response with a {@link CircuitBreakingException}.
118118
*/
119119
public void testSortByManyLongsTooMuchMemoryAsync() throws IOException {
120-
initManyLongs();
120+
initManyLongs(10);
121121
Request request = new Request("POST", "/_query/async");
122122
request.addParameter("error_trace", "");
123123
request.setJsonEntity(makeSortByManyLongs(5000).toString().replace("\n", "\\n"));
@@ -194,6 +194,29 @@ public void testSortByManyLongsTooMuchMemoryAsync() throws IOException {
194194
);
195195
}
196196

197+
public void testSortByManyLongsGiantTopN() throws IOException {
198+
initManyLongs(10);
199+
assertMap(
200+
sortBySomeLongsLimit(100000),
201+
matchesMap().entry("took", greaterThan(0))
202+
.entry("is_partial", false)
203+
.entry("columns", List.of(Map.of("name", "MAX(a)", "type", "long")))
204+
.entry("values", List.of(List.of(9)))
205+
.entry("documents_found", greaterThan(0))
206+
.entry("values_loaded", greaterThan(0))
207+
);
208+
}
209+
210+
public void testSortByManyLongsGiantTopNTooMuchMemory() throws IOException {
211+
initManyLongs(20);
212+
assertCircuitBreaks(attempt -> sortBySomeLongsLimit(attempt * 500000));
213+
}
214+
215+
public void testStupidTopN() throws IOException {
216+
initManyLongs(1); // Doesn't actually matter how much data there is.
217+
assertCircuitBreaks(attempt -> sortBySomeLongsLimit(2147483630));
218+
}
219+
197220
private static final int MAX_ATTEMPTS = 5;
198221

199222
interface TryCircuitBreaking {
@@ -252,11 +275,25 @@ private StringBuilder makeSortByManyLongs(int count) {
252275
return query;
253276
}
254277

278+
private Map<String, Object> sortBySomeLongsLimit(int count) throws IOException {
279+
logger.info("sorting by 5 longs, keeping {}", count);
280+
return responseAsMap(query(makeSortBySomeLongsLimit(count), null));
281+
}
282+
283+
private String makeSortBySomeLongsLimit(int count) {
284+
StringBuilder query = new StringBuilder("{\"query\": \"FROM manylongs\n");
285+
query.append("| SORT a, b, c, d, e\n");
286+
query.append("| LIMIT ").append(count).append("\n");
287+
query.append("| STATS MAX(a)\n");
288+
query.append("\"}");
289+
return query.toString();
290+
}
291+
255292
/**
256293
* This groups on about 200 columns which is a lot but has never caused us trouble.
257294
*/
258295
public void testGroupOnSomeLongs() throws IOException {
259-
initManyLongs();
296+
initManyLongs(10);
260297
Response resp = groupOnManyLongs(200);
261298
Map<String, Object> map = responseAsMap(resp);
262299
ListMatcher columns = matchesList().item(matchesMap().entry("name", "MAX(a)").entry("type", "long"));
@@ -268,7 +305,7 @@ public void testGroupOnSomeLongs() throws IOException {
268305
* This groups on 5000 columns which used to throw a {@link StackOverflowError}.
269306
*/
270307
public void testGroupOnManyLongs() throws IOException {
271-
initManyLongs();
308+
initManyLongs(10);
272309
Response resp = groupOnManyLongs(5000);
273310
Map<String, Object> map = responseAsMap(resp);
274311
ListMatcher columns = matchesList().item(matchesMap().entry("name", "MAX(a)").entry("type", "long"));
@@ -336,15 +373,15 @@ private Response concat(int evals) throws IOException {
336373
*/
337374
public void testManyConcat() throws IOException {
338375
int strings = 300;
339-
initManyLongs();
376+
initManyLongs(10);
340377
assertManyStrings(manyConcat("FROM manylongs", strings), strings);
341378
}
342379

343380
/**
344381
* Hits a circuit breaker by building many moderately long strings.
345382
*/
346383
public void testHugeManyConcat() throws IOException {
347-
initManyLongs();
384+
initManyLongs(10);
348385
// 2000 is plenty to break on most nodes
349386
assertCircuitBreaks(attempt -> manyConcat("FROM manylongs", attempt * 2000));
350387
}
@@ -415,15 +452,15 @@ private Map<String, Object> manyConcat(String init, int strings) throws IOExcept
415452
*/
416453
public void testManyRepeat() throws IOException {
417454
int strings = 30;
418-
initManyLongs();
455+
initManyLongs(10);
419456
assertManyStrings(manyRepeat("FROM manylongs", strings), 30);
420457
}
421458

422459
/**
423460
* Hits a circuit breaker by building many moderately long strings.
424461
*/
425462
public void testHugeManyRepeat() throws IOException {
426-
initManyLongs();
463+
initManyLongs(10);
427464
// 75 is plenty to break on most nodes
428465
assertCircuitBreaks(attempt -> manyRepeat("FROM manylongs", attempt * 75));
429466
}
@@ -481,7 +518,7 @@ private void assertManyStrings(Map<String, Object> resp, int strings) throws IOE
481518
}
482519

483520
public void testManyEval() throws IOException {
484-
initManyLongs();
521+
initManyLongs(10);
485522
Map<String, Object> response = manyEval(1);
486523
ListMatcher columns = matchesList();
487524
columns = columns.item(matchesMap().entry("name", "a").entry("type", "long"));
@@ -496,7 +533,7 @@ public void testManyEval() throws IOException {
496533
}
497534

498535
public void testTooManyEval() throws IOException {
499-
initManyLongs();
536+
initManyLongs(10);
500537
// 490 is plenty to fail on most nodes
501538
assertCircuitBreaks(attempt -> manyEval(attempt * 490));
502539
}
@@ -855,24 +892,34 @@ private Map<String, Object> enrichExplosion(int sensorDataCount, int lookupEntri
855892
}
856893
}
857894

858-
private void initManyLongs() throws IOException {
895+
private void initManyLongs(int countPerLong) throws IOException {
859896
logger.info("loading many documents with longs");
860897
StringBuilder bulk = new StringBuilder();
861-
for (int a = 0; a < 10; a++) {
862-
for (int b = 0; b < 10; b++) {
863-
for (int c = 0; c < 10; c++) {
864-
for (int d = 0; d < 10; d++) {
865-
for (int e = 0; e < 10; e++) {
898+
int flush = 0;
899+
for (int a = 0; a < countPerLong; a++) {
900+
for (int b = 0; b < countPerLong; b++) {
901+
for (int c = 0; c < countPerLong; c++) {
902+
for (int d = 0; d < countPerLong; d++) {
903+
for (int e = 0; e < countPerLong; e++) {
866904
bulk.append(String.format(Locale.ROOT, """
867905
{"create":{}}
868906
{"a":%d,"b":%d,"c":%d,"d":%d,"e":%d}
869907
""", a, b, c, d, e));
908+
flush++;
909+
if (flush % 10_000 == 0) {
910+
bulk("manylongs", bulk.toString());
911+
bulk.setLength(0);
912+
logger.info(
913+
"flushing {}/{} to manylongs",
914+
flush,
915+
countPerLong * countPerLong * countPerLong * countPerLong * countPerLong
916+
);
917+
918+
}
870919
}
871920
}
872921
}
873922
}
874-
bulk("manylongs", bulk.toString());
875-
bulk.setLength(0);
876923
}
877924
initIndex("manylongs", bulk.toString());
878925
}

0 commit comments

Comments
 (0)