Skip to content

Commit f217322

Browse files
authored
Estimate segment field usages (#112760) (#112777)
We have introduced a new memory estimation method in serverless, based on the number of segments and the fields within them. This new approach works well overall, but it still falls short in cases where most fields are used more than once - for example, in both doc_values and postings, or doc_values and points. This change exposes the total usage of fields in segments, allowing us to adjust the memory estimate for these cases.
1 parent b9662b5 commit f217322

File tree

5 files changed

+74
-4
lines changed

5 files changed

+74
-4
lines changed

server/src/main/java/org/elasticsearch/index/codec/DeduplicatingFieldInfosFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segm
6565
fi.isParentField()
6666
);
6767
}
68-
return new FieldInfos(deduplicated);
68+
return new FieldInfosWithUsages(deduplicated);
6969
}
7070

7171
private static Map<String, String> internStringStringMap(Map<String, String> m) {
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.index.codec;
10+
11+
import org.apache.lucene.index.DocValuesType;
12+
import org.apache.lucene.index.FieldInfo;
13+
import org.apache.lucene.index.FieldInfos;
14+
import org.apache.lucene.index.IndexOptions;
15+
16+
public class FieldInfosWithUsages extends FieldInfos {
17+
private final int totalUsages;
18+
19+
public FieldInfosWithUsages(FieldInfo[] infos) {
20+
super(infos);
21+
this.totalUsages = computeUsages(infos);
22+
}
23+
24+
public static int computeUsages(FieldInfo[] infos) {
25+
int usages = 0;
26+
for (FieldInfo fi : infos) {
27+
if (fi.getIndexOptions() != IndexOptions.NONE) {
28+
usages++;
29+
}
30+
if (fi.hasNorms()) {
31+
usages++;
32+
}
33+
if (fi.getDocValuesType() != DocValuesType.NONE) {
34+
usages++;
35+
}
36+
if (fi.getPointDimensionCount() > 0) {
37+
usages++;
38+
}
39+
if (fi.getVectorDimension() > 0) {
40+
usages++;
41+
}
42+
}
43+
return usages;
44+
}
45+
46+
public int getTotalUsages() {
47+
return totalUsages;
48+
}
49+
}

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
import org.elasticsearch.index.cache.query.TrivialQueryCachingPolicy;
8484
import org.elasticsearch.index.cache.request.ShardRequestCache;
8585
import org.elasticsearch.index.codec.CodecService;
86+
import org.elasticsearch.index.codec.FieldInfosWithUsages;
8687
import org.elasticsearch.index.engine.CommitStats;
8788
import org.elasticsearch.index.engine.Engine;
8889
import org.elasticsearch.index.engine.Engine.GetResult;
@@ -4093,11 +4094,20 @@ public void afterRefresh(boolean didRefresh) {
40934094
try (var searcher = getEngine().acquireSearcher("shard_field_stats", Engine.SearcherScope.INTERNAL)) {
40944095
int numSegments = 0;
40954096
int totalFields = 0;
4097+
long usages = 0;
40964098
for (LeafReaderContext leaf : searcher.getLeafContexts()) {
40974099
numSegments++;
4098-
totalFields += leaf.reader().getFieldInfos().size();
4100+
var fieldInfos = leaf.reader().getFieldInfos();
4101+
totalFields += fieldInfos.size();
4102+
if (fieldInfos instanceof FieldInfosWithUsages ft) {
4103+
if (usages != -1) {
4104+
usages += ft.getTotalUsages();
4105+
}
4106+
} else {
4107+
usages = -1;
4108+
}
40994109
}
4100-
shardFieldStats = new ShardFieldStats(numSegments, totalFields);
4110+
shardFieldStats = new ShardFieldStats(numSegments, totalFields, usages);
41014111
} catch (AlreadyClosedException ignored) {
41024112

41034113
}

server/src/main/java/org/elasticsearch/index/shard/ShardFieldStats.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
*
1515
* @param numSegments the number of segments
1616
* @param totalFields the total number of fields across the segments
17+
* @param fieldUsages the number of usages for segment-level fields (e.g., doc_values, postings, norms, points)
18+
* -1 if unavailable
1719
*/
18-
public record ShardFieldStats(int numSegments, int totalFields) {
20+
public record ShardFieldStats(int numSegments, int totalFields, long fieldUsages) {
1921

2022
}

server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1793,6 +1793,7 @@ public void testShardFieldStats() throws IOException {
17931793
assertNotNull(stats);
17941794
assertThat(stats.numSegments(), equalTo(0));
17951795
assertThat(stats.totalFields(), equalTo(0));
1796+
assertThat(stats.fieldUsages(), equalTo(0L));
17961797
// index some documents
17971798
int numDocs = between(1, 10);
17981799
for (int i = 0; i < numDocs; i++) {
@@ -1809,6 +1810,9 @@ public void testShardFieldStats() throws IOException {
18091810
assertThat(stats.numSegments(), equalTo(1));
18101811
// _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword,
18111812
assertThat(stats.totalFields(), equalTo(9));
1813+
// _id(term), _source(0), _version(dv), _primary_term(dv), _seq_no(point,dv), f1(postings,norms),
1814+
// f1.keyword(term,dv), f2(postings,norms), f2.keyword(term,dv),
1815+
assertThat(stats.fieldUsages(), equalTo(13L));
18121816
// don't re-compute on refresh without change
18131817
if (randomBoolean()) {
18141818
shard.refresh("test");
@@ -1838,10 +1842,15 @@ public void testShardFieldStats() throws IOException {
18381842
assertThat(stats.numSegments(), equalTo(2));
18391843
// 9 + _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword, f3, f3.keyword
18401844
assertThat(stats.totalFields(), equalTo(21));
1845+
// first segment: 13, second segment: 13 + f3(postings,norms) + f3.keyword(term,dv), and __soft_deletes to previous segment
1846+
assertThat(stats.fieldUsages(), equalTo(31L));
18411847
shard.forceMerge(new ForceMergeRequest().maxNumSegments(1).flush(true));
18421848
stats = shard.getShardFieldStats();
18431849
assertThat(stats.numSegments(), equalTo(1));
18441850
assertThat(stats.totalFields(), equalTo(12));
1851+
// _id(term), _source(0), _version(dv), _primary_term(dv), _seq_no(point,dv), f1(postings,norms),
1852+
// f1.keyword(term,dv), f2(postings,norms), f2.keyword(term,dv), f3(postings,norms), f3.keyword(term,dv), __soft_deletes
1853+
assertThat(stats.fieldUsages(), equalTo(18L));
18451854
closeShards(shard);
18461855
}
18471856

0 commit comments

Comments
 (0)