Skip to content

Commit 95abf2f

Browse files
committed
[SPARK-53010][MLLIB][YARN] Ban com.google.common.base.Strings
### What changes were proposed in this pull request? This PR aims to ban `com.google.common.base.Strings` in favor of Java's built-in method and `SparkStringUtils`. ### Why are the changes needed? - `Strings.repeat`: Java 11+ supports **faster** `String.repeat` natively. ```scala scala> spark.time("a".repeat(1_000_000_000).length) Time taken: 83 ms val res0: Int = 1000000000 scala> spark.time(com.google.common.base.Strings.repeat("a", 1_000_000_000).length) Time taken: 397 ms val res1: Int = 1000000000 ``` - `Strings.isNullOrEmpty`: `SparkStringUtils` supports `isEmpty` already. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51713 from dongjoon-hyun/SPARK-53010. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 783beb6 commit 95abf2f

File tree

4 files changed

+9
-5
lines changed

4 files changed

+9
-5
lines changed

dev/checkstyle.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@
185185
<property name="illegalPkgs" value="org.apache.commons.lang" />
186186
<property name="illegalPkgs" value="org.apache.commons.lang3.tuple" />
187187
<property name="illegalClasses" value="org.apache.commons.lang3.JavaVersion" />
188+
<property name="illegalClasses" value="com.google.common.base.Strings" />
188189
</module>
189190
<module name="RegexpSinglelineJava">
190191
<property name="format" value="new URL\("/>

mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
import java.util.Arrays;
2121
import java.util.List;
2222

23-
import com.google.common.base.Strings;
24-
2523
import scala.Tuple2;
2624

2725
import org.junit.jupiter.api.Assertions;
@@ -35,7 +33,7 @@ public class JavaWord2VecSuite extends SharedSparkSession {
3533
@Test
3634
public void word2Vec() {
3735
// The tests are to check Java compatibility.
38-
String sentence = Strings.repeat("a b ", 100) + Strings.repeat("a c ", 10);
36+
String sentence = "a b ".repeat(100) + "a c ".repeat(10);
3937
List<String> words = Arrays.asList(sentence.split(" "));
4038
List<List<String>> localDoc = Arrays.asList(words, words);
4139
JavaRDD<List<String>> doc = jsc.parallelize(localDoc);

resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ package org.apache.spark.deploy.yarn
2020
import scala.collection.mutable.ArrayBuffer
2121
import scala.jdk.CollectionConverters._
2222

23-
import com.google.common.base.Strings
2423
import org.apache.hadoop.conf.Configuration
2524
import org.apache.hadoop.fs.CommonConfigurationKeysPublic
2625
import org.apache.hadoop.net._
2726
import org.apache.hadoop.util.ReflectionUtils
2827

2928
import org.apache.spark.internal.{Logging, MDC}
3029
import org.apache.spark.internal.LogKeys.NODE_LOCATION
30+
import org.apache.spark.util.SparkStringUtils
3131

3232
/**
3333
* Re-implement YARN's [[RackResolver]] for hadoop releases without YARN-9332.
@@ -73,7 +73,7 @@ private[spark] class SparkRackResolver(conf: Configuration) extends Logging {
7373
log"Falling back to ${MDC(NODE_LOCATION, NetworkTopology.DEFAULT_RACK)} for all")
7474
} else {
7575
for ((hostName, rName) <- hostNames.zip(rNameList)) {
76-
if (Strings.isNullOrEmpty(rName)) {
76+
if (SparkStringUtils.isEmpty(rName)) {
7777
nodes += new NodeBase(hostName, NetworkTopology.DEFAULT_RACK)
7878
logDebug(s"Could not resolve $hostName. " +
7979
s"Falling back to ${NetworkTopology.DEFAULT_RACK}")

scalastyle-config.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,4 +511,9 @@ This file is divided into 3 sections:
511511
<parameters><parameter name="regex">buildConf\("spark.databricks.</parameter></parameters>
512512
<customMessage>Use Apache Spark config namespace.</customMessage>
513513
</check>
514+
515+
<check customId="googleStrings" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
516+
<parameters><parameter name="regex">com\.google\.common\.base\.Strings</parameter></parameters>
517+
<customMessage>Use Java built-in methods or SparkStringUtils instead</customMessage>
518+
</check>
514519
</scalastyle>

0 commit comments

Comments
 (0)