Skip to content

Commit 0d8e02f

Browse files
committed
Add TrackingPostingsInMemoryBytesCodec
Mostly copied from Nhat's implementation in elastic#121476
1 parent caae426 commit 0d8e02f

File tree

1 file changed

+139
-0
lines changed

1 file changed

+139
-0
lines changed
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec;
11+
12+
import org.apache.lucene.codecs.Codec;
13+
import org.apache.lucene.codecs.FieldsConsumer;
14+
import org.apache.lucene.codecs.FieldsProducer;
15+
import org.apache.lucene.codecs.FilterCodec;
16+
import org.apache.lucene.codecs.NormsProducer;
17+
import org.apache.lucene.codecs.PostingsFormat;
18+
import org.apache.lucene.index.FieldInfos;
19+
import org.apache.lucene.index.Fields;
20+
import org.apache.lucene.index.FilterLeafReader;
21+
import org.apache.lucene.index.SegmentReadState;
22+
import org.apache.lucene.index.SegmentWriteState;
23+
import org.apache.lucene.index.Terms;
24+
import org.apache.lucene.index.TermsEnum;
25+
import org.apache.lucene.internal.hppc.IntIntHashMap;
26+
import org.apache.lucene.util.BytesRef;
27+
28+
import java.io.IOException;
29+
import java.util.function.IntConsumer;
30+
31+
public class TrackingPostingsInMemoryBytesCodec extends FilterCodec {
32+
public static final String IN_MEMORY_POSTINGS_BYTES_KEY = "es.postings.in_memory_bytes";
33+
34+
public TrackingPostingsInMemoryBytesCodec(Codec delegate) {
35+
super(delegate.getName(), delegate);
36+
}
37+
38+
@Override
39+
public PostingsFormat postingsFormat() {
40+
PostingsFormat format = super.postingsFormat();
41+
42+
return new PostingsFormat(format.getName()) {
43+
@Override
44+
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
45+
FieldsConsumer consumer = format.fieldsConsumer(state);
46+
return new TrackingLengthFieldsConsumer(state, consumer);
47+
}
48+
49+
@Override
50+
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
51+
return format.fieldsProducer(state);
52+
}
53+
};
54+
}
55+
56+
static final class TrackingLengthFieldsConsumer extends FieldsConsumer {
57+
final SegmentWriteState state;
58+
final FieldsConsumer in;
59+
final IntIntHashMap maxLengths;
60+
61+
TrackingLengthFieldsConsumer(SegmentWriteState state, FieldsConsumer in) {
62+
this.state = state;
63+
this.in = in;
64+
this.maxLengths = new IntIntHashMap(state.fieldInfos.size());
65+
}
66+
67+
@Override
68+
public void write(Fields fields, NormsProducer norms) throws IOException {
69+
in.write(new TrackingLengthFields(fields, maxLengths, state.fieldInfos), norms);
70+
long totalLength = 0;
71+
for (int len : maxLengths.values) {
72+
totalLength += len; // minTerm
73+
totalLength += len; // maxTerm
74+
}
75+
state.segmentInfo.putAttribute(IN_MEMORY_POSTINGS_BYTES_KEY, Long.toString(totalLength));
76+
}
77+
78+
@Override
79+
public void close() throws IOException {
80+
in.close();
81+
}
82+
}
83+
84+
static final class TrackingLengthFields extends FilterLeafReader.FilterFields {
85+
final IntIntHashMap maxLengths;
86+
final FieldInfos fieldInfos;
87+
88+
TrackingLengthFields(Fields in, IntIntHashMap maxLengths, FieldInfos fieldInfos) {
89+
super(in);
90+
this.maxLengths = maxLengths;
91+
this.fieldInfos = fieldInfos;
92+
}
93+
94+
@Override
95+
public Terms terms(String field) throws IOException {
96+
Terms terms = super.terms(field);
97+
if (terms == null) {
98+
return terms;
99+
}
100+
int fieldNum = fieldInfos.fieldInfo(field).number;
101+
return new TrackingLengthTerms(terms, len -> maxLengths.put(fieldNum, Math.max(maxLengths.getOrDefault(fieldNum, 0), len)));
102+
}
103+
}
104+
105+
static final class TrackingLengthTerms extends FilterLeafReader.FilterTerms {
106+
final IntConsumer onFinish;
107+
108+
TrackingLengthTerms(Terms in, IntConsumer onFinish) {
109+
super(in);
110+
this.onFinish = onFinish;
111+
}
112+
113+
@Override
114+
public TermsEnum iterator() throws IOException {
115+
return new TrackingLengthTermsEnum(super.iterator(), onFinish);
116+
}
117+
}
118+
119+
static final class TrackingLengthTermsEnum extends FilterLeafReader.FilterTermsEnum {
120+
int maxTermLength = 0;
121+
final IntConsumer onFinish;
122+
123+
TrackingLengthTermsEnum(TermsEnum in, IntConsumer onFinish) {
124+
super(in);
125+
this.onFinish = onFinish;
126+
}
127+
128+
@Override
129+
public BytesRef next() throws IOException {
130+
final BytesRef term = super.next();
131+
if (term != null) {
132+
maxTermLength = Math.max(maxTermLength, term.length);
133+
} else {
134+
onFinish.accept(maxTermLength);
135+
}
136+
return term;
137+
}
138+
}
139+
}

0 commit comments

Comments
 (0)