apache · psalagnac · Mar 11, 2026
diff --git a/changelog/unreleased/SOLR-18157-javabin-buffer.yml b/changelog/unreleased/SOLR-18157-javabin-buffer.yml
@@ -0,0 +1,8 @@
+title: Optimize the size of the internal buffer of JavaBin codec to reduce the number of allocations. This reduces GC pressure on SolrJ client under high indexing load.
+type: changed # Could be 'optimized' ??
+authors:
+  - name: Pierre Salagnac
+links:
+  - name: SOLR-18157
+    url: https://issues.apache.org/jira/browse/SOLR-18157
+
diff --git a/solr/benchmark/src/java/org/apache/solr/bench/search/RequestWriters.java b/solr/benchmark/src/java/org/apache/solr/bench/search/RequestWriters.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.bench.search;
+
+import static org.apache.solr.bench.Docs.docs;
+import static org.apache.solr.bench.generators.SourceDSL.integers;
+import static org.apache.solr.bench.generators.SourceDSL.longs;
+import static org.apache.solr.bench.generators.SourceDSL.strings;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Iterator;
+import java.util.function.Supplier;
+import org.apache.commons.io.output.NullOutputStream;
+import org.apache.solr.bench.Docs;
+import org.apache.solr.client.solrj.request.JavaBinRequestWriter;
+import org.apache.solr.client.solrj.request.RequestWriter;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.request.XMLRequestWriter;
+import org.apache.solr.common.SolrInputDocument;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+/**
+ * Benchmark for serialization of requests by the client. This only focuses on the serialization
+ * itself, ignoring sending the request over the network (and for sure ignoring processing the
+ * request).
+ */
+@Fork(value = 1)
+@BenchmarkMode(Mode.Throughput)
+@Warmup(time = 2, iterations = 1)
+@Measurement(time = 5, iterations = 5)
+@Threads(value = 1)
+public class RequestWriters {
+
+  @State(Scope.Benchmark)
+  public static class BenchState {
+
+    @Param({"xml", "binary"})
+    String type;
+
+    @Param({"10", "100", "1000", "10000"})
+    int batchSize;
+
+    private final int docCount = 50000;
+
+    private Supplier<RequestWriter> writerSupplier;
+    private Iterator<SolrInputDocument> docIterator;
+
+    @Setup(Level.Trial)
+    public void setup() throws Exception {
+      preGenerateDocs();
+
+      switch (type) {
+        case "xml":
+          writerSupplier = XMLRequestWriter::new;
+          break;
+        case "javabin":
+          writerSupplier = JavaBinRequestWriter::new;
+          break;
+
+        default:
+          throw new Error("Unsupported type: " + type);
+      }
+    }
+
+    private void preGenerateDocs() throws Exception {
+      Docs docs =
+          docs()
+              .field("id", integers().incrementing())
+              .field(strings().basicLatinAlphabet().ofLengthBetween(10, 64))
+              .field(strings().basicLatinAlphabet().ofLengthBetween(10, 64))
+              .field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64))
+              .field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64))
+              .field(integers().all())
+              .field(integers().all())
+              .field(longs().all());
+
+      docs.preGenerate(docCount);
+      docIterator = docs.generatedDocsCircularIterator();
+    }
+  }
+
+  @Benchmark
+  public void writeUpdate(BenchState state) throws IOException {
+
+    OutputStream sink = NullOutputStream.INSTANCE;
+
+    UpdateRequest request = new UpdateRequest();
+    for (int i = 0; i < state.batchSize; i++) {
+      request.add(state.docIterator.next());
+    }
+
+    RequestWriter writer = state.writerSupplier.get();
+    writer.write(request, sink);
+  }
+}
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
@@ -113,9 +113,10 @@ public class JavaBinCodec implements PushWriter {
       NAMED_LST = (byte) (6 << 5), // NamedList
       EXTERN_STRING = (byte) (7 << 5);
 
+  private static final int MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 512;
   private static final int MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 65536;
 
-  private static byte VERSION = 2;
+  private static final byte VERSION = 2;
   private final ObjectResolver resolver;
   protected FastOutputStream daos;
   private StringCache stringCache;
@@ -1072,7 +1073,11 @@ public void writeStr(CharSequence s) throws IOException {
     int maxSize = end * ByteUtils.MAX_UTF8_BYTES_PER_CHAR;
 
     if (maxSize <= MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) {
-      if (bytes == null || bytes.length < maxSize) bytes = new byte[maxSize];
+      if (bytes == null || bytes.length < maxSize) {
+        int bufferSize = getBufferSize(maxSize);
+        bytes = new byte[bufferSize];
+      }
+
       int sz = ByteUtils.UTF16toUTF8(s, 0, end, bytes, 0);
       writeTag(STR, sz);
       daos.write(bytes, 0, sz);
@@ -1105,7 +1110,10 @@ public CharSequence readStr(
 
   private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, int sz)
       throws IOException {
-    if (bytes == null || bytes.length < sz) bytes = new byte[sz];
+    if (bytes == null || bytes.length < sz) {
+      int bufferSize = getBufferSize(sz);
+      bytes = new byte[bufferSize];
+    }
     dis.readFully(bytes, 0, sz);
     if (stringCache != null) {
       return stringCache.get(bytesRef.reset(bytes, 0, sz));
@@ -1116,6 +1124,28 @@ private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache,
     }
   }
 
+  /**
+   * Compute the buffer size for given required size. This returns the next power of 2 that is
+   * greater than or equal to the given size.
+   *
+   * <p>This is a trade-off so we don't start with a useless too big buffer, but we don't do too
+   * many allocations.
+   */
+  static int getBufferSize(int required) {
+
+    if (required < MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) {
+      return MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY;
+    }
+
+    int oneBit = Integer.highestOneBit(required);
+
+    if (oneBit == required) {
+      return oneBit;
+    } else {
+      return oneBit << 1;
+    }
+  }
+
   /////////// code to optimize reading UTF8
   static final int MAX_UTF8_SZ = 1024 * 64; // too big strings can cause too much memory allocation
   private Function<ByteArrayUtf8CharSequence, String> stringProvider;

diff --git a/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java b/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java
@@ -496,6 +496,16 @@ public void testStringCaching() throws Exception {
     assertSame(l1.get(1), l2.get(1));
   }
 
+  @Test
+  public void testBufferSize() {
+    assertEquals(512, JavaBinCodec.getBufferSize(1));
+    assertEquals(512, JavaBinCodec.getBufferSize(200));
+    assertEquals(512, JavaBinCodec.getBufferSize(500));
+    assertEquals(512, JavaBinCodec.getBufferSize(512));
+    assertEquals(1024, JavaBinCodec.getBufferSize(513));
+    assertEquals(2048, JavaBinCodec.getBufferSize(1500));
+  }
+
   public void genBinaryFiles() throws IOException {
 
     Object data = generateAllDataTypes();