Skip to content

Commit 7bd5cb1

Browse files
committed
SOLR-18157: Optimize buffer allocation in JavaBinCodec
Implements a smarter allocation strategy using powers of 2 for the internal buffer of JavaBinCodec.
1 parent 4af06a9 commit 7bd5cb1

File tree

4 files changed

+172
-3
lines changed

4 files changed

+172
-3
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
title: Optimize the size of the internal buffer of JavaBin codec to reduce the number of allocations. This reduces GC pressure on SolrJ client under high indexing load.
2+
type: changed # Could be 'optimized' ??
3+
authors:
4+
- name: Pierre Salagnac
5+
links:
6+
- name: SOLR-18157
7+
url: https://issues.apache.org/jira/browse/SOLR-18157
8+
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.solr.bench.search;
18+
19+
import static org.apache.solr.bench.Docs.docs;
20+
import static org.apache.solr.bench.generators.SourceDSL.integers;
21+
import static org.apache.solr.bench.generators.SourceDSL.longs;
22+
import static org.apache.solr.bench.generators.SourceDSL.strings;
23+
24+
import java.io.IOException;
25+
import java.io.OutputStream;
26+
import java.util.Iterator;
27+
import java.util.function.Supplier;
28+
import org.apache.commons.io.output.NullOutputStream;
29+
import org.apache.solr.bench.Docs;
30+
import org.apache.solr.client.solrj.request.JavaBinRequestWriter;
31+
import org.apache.solr.client.solrj.request.RequestWriter;
32+
import org.apache.solr.client.solrj.request.UpdateRequest;
33+
import org.apache.solr.client.solrj.request.XMLRequestWriter;
34+
import org.apache.solr.common.SolrInputDocument;
35+
import org.openjdk.jmh.annotations.Benchmark;
36+
import org.openjdk.jmh.annotations.BenchmarkMode;
37+
import org.openjdk.jmh.annotations.Fork;
38+
import org.openjdk.jmh.annotations.Level;
39+
import org.openjdk.jmh.annotations.Measurement;
40+
import org.openjdk.jmh.annotations.Mode;
41+
import org.openjdk.jmh.annotations.Param;
42+
import org.openjdk.jmh.annotations.Scope;
43+
import org.openjdk.jmh.annotations.Setup;
44+
import org.openjdk.jmh.annotations.State;
45+
import org.openjdk.jmh.annotations.Threads;
46+
import org.openjdk.jmh.annotations.Warmup;
47+
48+
/**
49+
* Benchmark for serialization of requests by the client. This only focuses on the serialization
50+
* itself, ignoring sending the request over the network (and for sure ignoring processing the
51+
* request).
52+
*/
53+
@Fork(value = 1)
54+
@BenchmarkMode(Mode.Throughput)
55+
@Warmup(time = 2, iterations = 1)
56+
@Measurement(time = 5, iterations = 5)
57+
@Threads(value = 1)
58+
public class RequestWriters {
59+
60+
@State(Scope.Benchmark)
61+
public static class BenchState {
62+
63+
@Param({"xml", "binary"})
64+
String type;
65+
66+
@Param({"10", "100", "1000", "10000"})
67+
int batchSize;
68+
69+
private final int docCount = 50000;
70+
71+
private Supplier<RequestWriter> writerSupplier;
72+
private Iterator<SolrInputDocument> docIterator;
73+
74+
@Setup(Level.Trial)
75+
public void setup() throws Exception {
76+
preGenerateDocs();
77+
78+
switch (type) {
79+
case "xml":
80+
writerSupplier = XMLRequestWriter::new;
81+
break;
82+
case "javabin":
83+
writerSupplier = JavaBinRequestWriter::new;
84+
break;
85+
86+
default:
87+
throw new Error("Unsupported type: " + type);
88+
}
89+
}
90+
91+
private void preGenerateDocs() throws Exception {
92+
Docs docs =
93+
docs()
94+
.field("id", integers().incrementing())
95+
.field(strings().basicLatinAlphabet().ofLengthBetween(10, 64))
96+
.field(strings().basicLatinAlphabet().ofLengthBetween(10, 64))
97+
.field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64))
98+
.field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64))
99+
.field(integers().all())
100+
.field(integers().all())
101+
.field(longs().all());
102+
103+
docs.preGenerate(docCount);
104+
docIterator = docs.generatedDocsCircularIterator();
105+
}
106+
}
107+
108+
@Benchmark
109+
public void writeUpdate(BenchState state) throws IOException {
110+
111+
OutputStream sink = NullOutputStream.INSTANCE;
112+
113+
UpdateRequest request = new UpdateRequest();
114+
for (int i = 0; i < state.batchSize; i++) {
115+
request.add(state.docIterator.next());
116+
}
117+
118+
RequestWriter writer = state.writerSupplier.get();
119+
writer.write(request, sink);
120+
}
121+
}

solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,10 @@ public class JavaBinCodec implements PushWriter {
113113
NAMED_LST = (byte) (6 << 5), // NamedList
114114
EXTERN_STRING = (byte) (7 << 5);
115115

116+
private static final int MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 512;
116117
private static final int MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 65536;
117118

118-
private static byte VERSION = 2;
119+
private static final byte VERSION = 2;
119120
private final ObjectResolver resolver;
120121
protected FastOutputStream daos;
121122
private StringCache stringCache;
@@ -1072,7 +1073,11 @@ public void writeStr(CharSequence s) throws IOException {
10721073
int maxSize = end * ByteUtils.MAX_UTF8_BYTES_PER_CHAR;
10731074

10741075
if (maxSize <= MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) {
1075-
if (bytes == null || bytes.length < maxSize) bytes = new byte[maxSize];
1076+
if (bytes == null || bytes.length < maxSize) {
1077+
int bufferSize = getBufferSize(maxSize);
1078+
bytes = new byte[bufferSize];
1079+
}
1080+
10761081
int sz = ByteUtils.UTF16toUTF8(s, 0, end, bytes, 0);
10771082
writeTag(STR, sz);
10781083
daos.write(bytes, 0, sz);
@@ -1105,7 +1110,10 @@ public CharSequence readStr(
11051110

11061111
private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, int sz)
11071112
throws IOException {
1108-
if (bytes == null || bytes.length < sz) bytes = new byte[sz];
1113+
if (bytes == null || bytes.length < sz) {
1114+
int bufferSize = getBufferSize(sz);
1115+
bytes = new byte[bufferSize];
1116+
}
11091117
dis.readFully(bytes, 0, sz);
11101118
if (stringCache != null) {
11111119
return stringCache.get(bytesRef.reset(bytes, 0, sz));
@@ -1116,6 +1124,28 @@ private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache,
11161124
}
11171125
}
11181126

1127+
/**
1128+
* Compute the buffer size for given required size. This returns the next power of 2 that is
1129+
* greater than or equal to the given size.
1130+
*
1131+
* <p>This is a trade-off so we don't start with a useless too big buffer, but we don't do too
1132+
* many allocations.
1133+
*/
1134+
static int getBufferSize(int required) {
1135+
1136+
if (required < MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) {
1137+
return MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY;
1138+
}
1139+
1140+
int oneBit = Integer.highestOneBit(required);
1141+
1142+
if (oneBit == required) {
1143+
return oneBit;
1144+
} else {
1145+
return oneBit << 1;
1146+
}
1147+
}
1148+
11191149
/////////// code to optimize reading UTF8
11201150
static final int MAX_UTF8_SZ = 1024 * 64; // too big strings can cause too much memory allocation
11211151
private Function<ByteArrayUtf8CharSequence, String> stringProvider;

solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,16 @@ public void testStringCaching() throws Exception {
496496
assertSame(l1.get(1), l2.get(1));
497497
}
498498

499+
@Test
500+
public void testBufferSize() {
501+
assertEquals(512, JavaBinCodec.getBufferSize(1));
502+
assertEquals(512, JavaBinCodec.getBufferSize(200));
503+
assertEquals(512, JavaBinCodec.getBufferSize(500));
504+
assertEquals(512, JavaBinCodec.getBufferSize(512));
505+
assertEquals(1024, JavaBinCodec.getBufferSize(513));
506+
assertEquals(2048, JavaBinCodec.getBufferSize(1500));
507+
}
508+
499509
public void genBinaryFiles() throws IOException {
500510

501511
Object data = generateAllDataTypes();

0 commit comments

Comments
 (0)