Optimize AbstractStatementParser.statementStartsWith

toddlipcon · toddlipcon · commit 63cf1e352d61 · 2025-04-07T21:35:00.000-07:00
I found this was taking ~25% of the CPU of pgadapter when running
the TPCC benchmark loader, which seems to generate very large DMLs.

Previously, it would call split() over the whole string with
a limit. Now, it uses Guava's lazy splitter so that it doesn't
have to copy the remainder of the string following the second
match.

For whatever reason, it seems like the previous implementation
was doing something much more expensive than just copying the
tail. For 100kb long query text, this new implementation is
1600x faster. For short queries it's only a few times faster.

Before:
Benchmark                                Mode  Cnt        Score        Error  Units
StatementParserBenchmark.isQueryTest    thrpt    5  1461962.835 ± 340237.573  ops/s
StatementParserBenchmark.longQueryTest  thrpt    5     2873.150 ±    490.611  ops/s

After:

Benchmark                                Mode  Cnt        Score        Error  Units
StatementParserBenchmark.isQueryTest    thrpt    5  4765215.378 ± 132661.232  ops/s
StatementParserBenchmark.longQueryTest  thrpt    5  4671884.683 ± 486566.506  ops/s
diff --git a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java
@@ -31,6 +31,7 @@
 import com.google.cloud.spanner.connection.UnitOfWork.CallType;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Splitter;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheStats;
@@ -41,6 +42,7 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
@@ -614,16 +616,20 @@ public boolean isUpdateStatement(String sql) {
 
   private boolean statementStartsWith(String sql, Iterable<String> checkStatements) {
     Preconditions.checkNotNull(sql);
-    String[] tokens = sql.split("\\s+", 2);
-    int checkIndex = 0;
-    if (supportsExplain() && tokens[0].equalsIgnoreCase("EXPLAIN")) {
-      checkIndex = 1;
-    }
-    if (tokens.length > checkIndex) {
-      for (String check : checkStatements) {
-        if (tokens[checkIndex].equalsIgnoreCase(check)) {
-          return true;
-        }
+    Iterator<String> tokens = Splitter.onPattern("\\s+").split(sql).iterator();
+    if (!tokens.hasNext()) {
+      return false;
+    }
+    String token = tokens.next();
+    if (supportsExplain() && token.equalsIgnoreCase("EXPLAIN")) {
+      if (!tokens.hasNext()) {
+        return false;
+      }
+      token = tokens.next();
+    }
+    for (String check : checkStatements) {
+      if (token.equalsIgnoreCase(check)) {
+        return true;
       }
     }
     return false;