Skip to content

Commit 83d8453

Browse files
committed
ESQL: Reserve memory TopN (elastic#134235)
Tracks the more memory that's involved in topn. Lucene doesn't track memory usage for TopN and can use a fair bit of it. Try this query: ``` FROM big_table | SORT a, b, c, d, e | LIMIT 1000000 | STATS MAX(a) ``` We attempt to return all million documents from lucene. Is we did this with the compute engine we're track all of the memory usage. With lucene we have to reserve it. In the case of the query above the sort keys weight 8 bytes each. 40 bytes total. Plus another 72 for Lucene's `FieldDoc`. And another 40 at least for copying to the values to `FieldDoc`. That totals something like 152 bytes a piece. That's 145mb. Worth tracking! ## Esql Engine TopN Esql *does* track memory for topn, but it doesn't track the memory used by the min heap itself. It's just a big array of pointers. But it can get very big!
1 parent ffbb769 commit 83d8453

File tree

9 files changed

+320
-68
lines changed

9 files changed

+320
-68
lines changed

docs/changelog/134235.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 134235
2+
summary: Reserve memory for Lucene's TopN
3+
area: ES|QL
4+
type: bug
5+
issues: []

test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java

Lines changed: 66 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ public void skipOnAborted() {
9090
* This used to fail, but we've since compacted top n so it actually succeeds now.
9191
*/
9292
public void testSortByManyLongsSuccess() throws IOException {
93-
initManyLongs();
93+
initManyLongs(10);
9494
Map<String, Object> response = sortByManyLongs(500);
9595
ListMatcher columns = matchesList().item(matchesMap().entry("name", "a").entry("type", "long"))
9696
.item(matchesMap().entry("name", "b").entry("type", "long"));
@@ -107,7 +107,7 @@ public void testSortByManyLongsSuccess() throws IOException {
107107
* This used to crash the node with an out of memory, but now it just trips a circuit breaker.
108108
*/
109109
public void testSortByManyLongsTooMuchMemory() throws IOException {
110-
initManyLongs();
110+
initManyLongs(10);
111111
// 5000 is plenty to break on most nodes
112112
assertCircuitBreaks(attempt -> sortByManyLongs(attempt * 5000));
113113
}
@@ -116,7 +116,7 @@ public void testSortByManyLongsTooMuchMemory() throws IOException {
116116
* This should record an async response with a {@link CircuitBreakingException}.
117117
*/
118118
public void testSortByManyLongsTooMuchMemoryAsync() throws IOException {
119-
initManyLongs();
119+
initManyLongs(10);
120120
Request request = new Request("POST", "/_query/async");
121121
request.addParameter("error_trace", "");
122122
request.setJsonEntity(makeSortByManyLongs(5000).toString().replace("\n", "\\n"));
@@ -193,6 +193,29 @@ public void testSortByManyLongsTooMuchMemoryAsync() throws IOException {
193193
);
194194
}
195195

196+
public void testSortByManyLongsGiantTopN() throws IOException {
197+
initManyLongs(10);
198+
assertMap(
199+
sortBySomeLongsLimit(100000),
200+
matchesMap().entry("took", greaterThan(0))
201+
.entry("is_partial", false)
202+
.entry("columns", List.of(Map.of("name", "MAX(a)", "type", "long")))
203+
.entry("values", List.of(List.of(9)))
204+
.entry("documents_found", greaterThan(0))
205+
.entry("values_loaded", greaterThan(0))
206+
);
207+
}
208+
209+
public void testSortByManyLongsGiantTopNTooMuchMemory() throws IOException {
210+
initManyLongs(20);
211+
assertCircuitBreaks(attempt -> sortBySomeLongsLimit(attempt * 500000));
212+
}
213+
214+
public void testStupidTopN() throws IOException {
215+
initManyLongs(1); // Doesn't actually matter how much data there is.
216+
assertCircuitBreaks(attempt -> sortBySomeLongsLimit(2147483630));
217+
}
218+
196219
private static final int MAX_ATTEMPTS = 5;
197220

198221
interface TryCircuitBreaking {
@@ -251,11 +274,25 @@ private StringBuilder makeSortByManyLongs(int count) {
251274
return query;
252275
}
253276

277+
private Map<String, Object> sortBySomeLongsLimit(int count) throws IOException {
278+
logger.info("sorting by 5 longs, keeping {}", count);
279+
return responseAsMap(query(makeSortBySomeLongsLimit(count), null));
280+
}
281+
282+
private String makeSortBySomeLongsLimit(int count) {
283+
StringBuilder query = new StringBuilder("{\"query\": \"FROM manylongs\n");
284+
query.append("| SORT a, b, c, d, e\n");
285+
query.append("| LIMIT ").append(count).append("\n");
286+
query.append("| STATS MAX(a)\n");
287+
query.append("\"}");
288+
return query.toString();
289+
}
290+
254291
/**
255292
* This groups on about 200 columns which is a lot but has never caused us trouble.
256293
*/
257294
public void testGroupOnSomeLongs() throws IOException {
258-
initManyLongs();
295+
initManyLongs(10);
259296
Response resp = groupOnManyLongs(200);
260297
Map<String, Object> map = responseAsMap(resp);
261298
ListMatcher columns = matchesList().item(matchesMap().entry("name", "MAX(a)").entry("type", "long"));
@@ -267,7 +304,7 @@ public void testGroupOnSomeLongs() throws IOException {
267304
* This groups on 5000 columns which used to throw a {@link StackOverflowError}.
268305
*/
269306
public void testGroupOnManyLongs() throws IOException {
270-
initManyLongs();
307+
initManyLongs(10);
271308
Response resp = groupOnManyLongs(5000);
272309
Map<String, Object> map = responseAsMap(resp);
273310
ListMatcher columns = matchesList().item(matchesMap().entry("name", "MAX(a)").entry("type", "long"));
@@ -335,15 +372,15 @@ private Response concat(int evals) throws IOException {
335372
*/
336373
public void testManyConcat() throws IOException {
337374
int strings = 300;
338-
initManyLongs();
375+
initManyLongs(10);
339376
assertManyStrings(manyConcat("FROM manylongs", strings), strings);
340377
}
341378

342379
/**
343380
* Hits a circuit breaker by building many moderately long strings.
344381
*/
345382
public void testHugeManyConcat() throws IOException {
346-
initManyLongs();
383+
initManyLongs(10);
347384
// 2000 is plenty to break on most nodes
348385
assertCircuitBreaks(attempt -> manyConcat("FROM manylongs", attempt * 2000));
349386
}
@@ -414,15 +451,15 @@ private Map<String, Object> manyConcat(String init, int strings) throws IOExcept
414451
*/
415452
public void testManyRepeat() throws IOException {
416453
int strings = 30;
417-
initManyLongs();
454+
initManyLongs(10);
418455
assertManyStrings(manyRepeat("FROM manylongs", strings), 30);
419456
}
420457

421458
/**
422459
* Hits a circuit breaker by building many moderately long strings.
423460
*/
424461
public void testHugeManyRepeat() throws IOException {
425-
initManyLongs();
462+
initManyLongs(10);
426463
// 75 is plenty to break on most nodes
427464
assertCircuitBreaks(attempt -> manyRepeat("FROM manylongs", attempt * 75));
428465
}
@@ -480,7 +517,7 @@ private void assertManyStrings(Map<String, Object> resp, int strings) throws IOE
480517
}
481518

482519
public void testManyEval() throws IOException {
483-
initManyLongs();
520+
initManyLongs(10);
484521
Map<String, Object> response = manyEval(1);
485522
ListMatcher columns = matchesList();
486523
columns = columns.item(matchesMap().entry("name", "a").entry("type", "long"));
@@ -495,7 +532,7 @@ public void testManyEval() throws IOException {
495532
}
496533

497534
public void testTooManyEval() throws IOException {
498-
initManyLongs();
535+
initManyLongs(10);
499536
// 490 is plenty to fail on most nodes
500537
assertCircuitBreaks(attempt -> manyEval(attempt * 490));
501538
}
@@ -810,24 +847,34 @@ private Map<String, Object> enrichExplosion(int sensorDataCount, int lookupEntri
810847
}
811848
}
812849

813-
private void initManyLongs() throws IOException {
850+
private void initManyLongs(int countPerLong) throws IOException {
814851
logger.info("loading many documents with longs");
815852
StringBuilder bulk = new StringBuilder();
816-
for (int a = 0; a < 10; a++) {
817-
for (int b = 0; b < 10; b++) {
818-
for (int c = 0; c < 10; c++) {
819-
for (int d = 0; d < 10; d++) {
820-
for (int e = 0; e < 10; e++) {
853+
int flush = 0;
854+
for (int a = 0; a < countPerLong; a++) {
855+
for (int b = 0; b < countPerLong; b++) {
856+
for (int c = 0; c < countPerLong; c++) {
857+
for (int d = 0; d < countPerLong; d++) {
858+
for (int e = 0; e < countPerLong; e++) {
821859
bulk.append(String.format(Locale.ROOT, """
822860
{"create":{}}
823861
{"a":%d,"b":%d,"c":%d,"d":%d,"e":%d}
824862
""", a, b, c, d, e));
863+
flush++;
864+
if (flush % 10_000 == 0) {
865+
bulk("manylongs", bulk.toString());
866+
bulk.setLength(0);
867+
logger.info(
868+
"flushing {}/{} to manylongs",
869+
flush,
870+
countPerLong * countPerLong * countPerLong * countPerLong * countPerLong
871+
);
872+
873+
}
825874
}
826875
}
827876
}
828877
}
829-
bulk("manylongs", bulk.toString());
830-
bulk.setLength(0);
831878
}
832879
initIndex("manylongs", bulk.toString());
833880
}

0 commit comments

Comments
 (0)