Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions changelog/unreleased/SOLR-18157-javabin-buffer.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
title: Optimize the size of the internal buffer of JavaBin codec to reduce the number of allocations. This reduces GC pressure on SolrJ client under high indexing load.
type: changed # Could be 'optimized' ??
authors:
- name: Pierre Salagnac
links:
- name: SOLR-18157
url: https://issues.apache.org/jira/browse/SOLR-18157

Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.bench.search;

import static org.apache.solr.bench.Docs.docs;
import static org.apache.solr.bench.generators.SourceDSL.integers;
import static org.apache.solr.bench.generators.SourceDSL.longs;
import static org.apache.solr.bench.generators.SourceDSL.strings;

import java.io.IOException;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.function.Supplier;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.solr.bench.Docs;
import org.apache.solr.client.solrj.request.JavaBinRequestWriter;
import org.apache.solr.client.solrj.request.RequestWriter;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.request.XMLRequestWriter;
import org.apache.solr.common.SolrInputDocument;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;

/**
* Benchmark for serialization of requests by the client. This only focuses on the serialization
* itself, ignoring sending the request over the network (and for sure ignoring processing the
* request).
*/
@Fork(value = 1)
@BenchmarkMode(Mode.Throughput)
@Warmup(time = 2, iterations = 1)
@Measurement(time = 5, iterations = 5)
@Threads(value = 1)
public class RequestWriters {

@State(Scope.Benchmark)
public static class BenchState {

@Param({"xml", "binary"})
String type;

@Param({"10", "100", "1000", "10000"})
int batchSize;

private final int docCount = 50000;

private Supplier<RequestWriter> writerSupplier;
private Iterator<SolrInputDocument> docIterator;

@Setup(Level.Trial)
public void setup() throws Exception {
preGenerateDocs();

switch (type) {
case "xml":
writerSupplier = XMLRequestWriter::new;
break;
case "javabin":
writerSupplier = JavaBinRequestWriter::new;
break;

default:
throw new Error("Unsupported type: " + type);
}
}

private void preGenerateDocs() throws Exception {
Docs docs =
docs()
.field("id", integers().incrementing())
.field(strings().basicLatinAlphabet().ofLengthBetween(10, 64))
.field(strings().basicLatinAlphabet().ofLengthBetween(10, 64))
.field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64))
.field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64))
.field(integers().all())
.field(integers().all())
.field(longs().all());

docs.preGenerate(docCount);
docIterator = docs.generatedDocsCircularIterator();
}
}

@Benchmark
public void writeUpdate(BenchState state) throws IOException {

OutputStream sink = NullOutputStream.INSTANCE;

UpdateRequest request = new UpdateRequest();
for (int i = 0; i < state.batchSize; i++) {
request.add(state.docIterator.next());
}

RequestWriter writer = state.writerSupplier.get();
writer.write(request, sink);
}
}
36 changes: 33 additions & 3 deletions solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,10 @@ public class JavaBinCodec implements PushWriter {
NAMED_LST = (byte) (6 << 5), // NamedList
EXTERN_STRING = (byte) (7 << 5);

private static final int MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 512;
private static final int MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 65536;

private static byte VERSION = 2;
private static final byte VERSION = 2;
private final ObjectResolver resolver;
protected FastOutputStream daos;
private StringCache stringCache;
Expand Down Expand Up @@ -1072,7 +1073,11 @@ public void writeStr(CharSequence s) throws IOException {
int maxSize = end * ByteUtils.MAX_UTF8_BYTES_PER_CHAR;

if (maxSize <= MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) {
if (bytes == null || bytes.length < maxSize) bytes = new byte[maxSize];
if (bytes == null || bytes.length < maxSize) {
int bufferSize = getBufferSize(maxSize);
bytes = new byte[bufferSize];
}

int sz = ByteUtils.UTF16toUTF8(s, 0, end, bytes, 0);
writeTag(STR, sz);
daos.write(bytes, 0, sz);
Expand Down Expand Up @@ -1105,7 +1110,10 @@ public CharSequence readStr(

private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, int sz)
throws IOException {
if (bytes == null || bytes.length < sz) bytes = new byte[sz];
if (bytes == null || bytes.length < sz) {
int bufferSize = getBufferSize(sz);
bytes = new byte[bufferSize];
}
dis.readFully(bytes, 0, sz);
if (stringCache != null) {
return stringCache.get(bytesRef.reset(bytes, 0, sz));
Expand All @@ -1116,6 +1124,28 @@ private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache,
}
}

/**
* Compute the buffer size for given required size. This returns the next power of 2 that is
* greater than or equal to the given size.
*
* <p>This is a trade-off so we don't start with a useless too big buffer, but we don't do too
* many allocations.
*/
static int getBufferSize(int required) {

if (required < MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) {
return MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY;
}

int oneBit = Integer.highestOneBit(required);

if (oneBit == required) {
return oneBit;
} else {
return oneBit << 1;
}
}

/////////// code to optimize reading UTF8
static final int MAX_UTF8_SZ = 1024 * 64; // too big strings can cause too much memory allocation
private Function<ByteArrayUtf8CharSequence, String> stringProvider;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,16 @@ public void testStringCaching() throws Exception {
assertSame(l1.get(1), l2.get(1));
}

@Test
public void testBufferSize() {
assertEquals(512, JavaBinCodec.getBufferSize(1));
assertEquals(512, JavaBinCodec.getBufferSize(200));
assertEquals(512, JavaBinCodec.getBufferSize(500));
assertEquals(512, JavaBinCodec.getBufferSize(512));
assertEquals(1024, JavaBinCodec.getBufferSize(513));
assertEquals(2048, JavaBinCodec.getBufferSize(1500));
}

public void genBinaryFiles() throws IOException {

Object data = generateAllDataTypes();
Expand Down
Loading