Skip to content

Commit a2f502c

Browse files
committed
[SPARK-25565][BUILD] Add scalastyle rule to check add Locale.ROOT to .toLowerCase and .toUpperCase for internal calls
## What changes were proposed in this pull request? This PR adds a rule to force `.toLowerCase(Locale.ROOT)` or `toUpperCase(Locale.ROOT)`. It produces an error as below: ``` [error] Are you sure that you want to use toUpperCase or toLowerCase without the root locale? In most cases, you [error] should use toUpperCase(Locale.ROOT) or toLowerCase(Locale.ROOT) instead. [error] If you must use toUpperCase or toLowerCase without the root locale, wrap the code block with [error] // scalastyle:off caselocale [error] .toUpperCase [error] .toLowerCase [error] // scalastyle:on caselocale ``` This PR excludes the cases above for SQL code path for external calls like table name, column name and etc. For test suites, or when it's clear there's no locale problem like Turkish locale problem, it uses `Locale.ROOT`. One minor problem is, `UTF8String` has both methods, `toLowerCase` and `toUpperCase`, and the new rule detects them as well. They are ignored. ## How was this patch tested? Manually tested, and Jenkins tests. Closes apache#22581 from HyukjinKwon/SPARK-25565. Authored-by: hyukjinkwon <[email protected]> Signed-off-by: hyukjinkwon <[email protected]>
1 parent b6b8a66 commit a2f502c

File tree

31 files changed

+132
-40
lines changed

31 files changed

+132
-40
lines changed

common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenProperty
6363
}
6464
}
6565

66+
// scalastyle:off caselocale
6667
test("toUpperCase") {
6768
forAll { (s: String) =>
6869
assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase))
@@ -74,6 +75,7 @@ class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenProperty
7475
assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase))
7576
}
7677
}
78+
// scalastyle:on caselocale
7779

7880
test("compare") {
7981
forAll { (s1: String, s2: String) =>

core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.metrics.sink
1919

20-
import java.util.Properties
20+
import java.util.{Locale, Properties}
2121
import java.util.concurrent.TimeUnit
2222

2323
import com.codahale.metrics.MetricRegistry
@@ -52,7 +52,8 @@ private[spark] class StatsdSink(
5252

5353
val pollPeriod = property.getProperty(STATSD_KEY_PERIOD, STATSD_DEFAULT_PERIOD).toInt
5454
val pollUnit =
55-
TimeUnit.valueOf(property.getProperty(STATSD_KEY_UNIT, STATSD_DEFAULT_UNIT).toUpperCase)
55+
TimeUnit.valueOf(
56+
property.getProperty(STATSD_KEY_UNIT, STATSD_DEFAULT_UNIT).toUpperCase(Locale.ROOT))
5657

5758
val prefix = property.getProperty(STATSD_KEY_PREFIX, STATSD_DEFAULT_PREFIX)
5859

core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ import org.apache.spark.internal.Logging
3535
*
3636
* val rdd: RDD[(String, Int)] = ...
3737
* implicit val caseInsensitiveOrdering = new Ordering[String] {
38-
* override def compare(a: String, b: String) = a.toLowerCase.compare(b.toLowerCase)
38+
* override def compare(a: String, b: String) =
39+
* a.toLowerCase(Locale.ROOT).compare(b.toLowerCase(Locale.ROOT))
3940
* }
4041
*
4142
* // Sort by key, using the above case insensitive ordering.

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2736,7 +2736,7 @@ private[spark] object Utils extends Logging {
27362736
}
27372737

27382738
val masterScheme = new URI(masterWithoutK8sPrefix).getScheme
2739-
val resolvedURL = masterScheme.toLowerCase match {
2739+
val resolvedURL = masterScheme.toLowerCase(Locale.ROOT) match {
27402740
case "https" =>
27412741
masterWithoutK8sPrefix
27422742
case "http" =>

core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.history
1919

2020
import java.io._
2121
import java.nio.charset.StandardCharsets
22-
import java.util.Date
22+
import java.util.{Date, Locale}
2323
import java.util.concurrent.TimeUnit
2424
import java.util.zip.{ZipInputStream, ZipOutputStream}
2525

@@ -834,7 +834,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
834834
doThrow(new AccessControlException("Cannot read accessDenied file")).when(mockedFs).open(
835835
argThat(new ArgumentMatcher[Path]() {
836836
override def matches(path: Any): Boolean = {
837-
path.asInstanceOf[Path].getName.toLowerCase == "accessdenied"
837+
path.asInstanceOf[Path].getName.toLowerCase(Locale.ROOT) == "accessdenied"
838838
}
839839
}))
840840
val mockedProvider = spy(provider)

mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,9 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String
118118
}
119119
} else {
120120
val lc = new Locale($(locale))
121+
// scalastyle:off caselocale
121122
val toLower = (s: String) => if (s != null) s.toLowerCase(lc) else s
123+
// scalastyle:on caselocale
122124
val lowerStopWords = $(stopWords).map(toLower(_)).toSet
123125
udf { terms: Seq[String] =>
124126
terms.filter(s => !lowerStopWords.contains(toLower(s)))

mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
3636
def this() = this(Identifiable.randomUID("tok"))
3737

3838
override protected def createTransformFunc: String => Seq[String] = {
39+
// scalastyle:off caselocale
3940
_.toLowerCase.split("\\s")
41+
// scalastyle:on caselocale
4042
}
4143

4244
override protected def validateInputType(inputType: DataType): Unit = {
@@ -140,7 +142,9 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
140142

141143
override protected def createTransformFunc: String => Seq[String] = { originStr =>
142144
val re = $(pattern).r
145+
// scalastyle:off caselocale
143146
val str = if ($(toLowercase)) originStr.toLowerCase() else originStr
147+
// scalastyle:on caselocale
144148
val tokens = if ($(gaps)) re.split(str).toSeq else re.findAllIn(str).toSeq
145149
val minLength = $(minTokenLength)
146150
tokens.filter(_.length >= minLength)

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
package org.apache.spark.deploy.k8s.submit
1818

1919
import java.io.StringWriter
20-
import java.util.{Collections, UUID}
20+
import java.util.{Collections, Locale, UUID}
2121
import java.util.Properties
2222

2323
import io.fabric8.kubernetes.api.model._
@@ -260,7 +260,7 @@ private[spark] object KubernetesClientApplication {
260260
val launchTime = System.currentTimeMillis()
261261
s"$appName-$launchTime"
262262
.trim
263-
.toLowerCase
263+
.toLowerCase(Locale.ROOT)
264264
.replaceAll("\\s+", "-")
265265
.replaceAll("\\.", "-")
266266
.replaceAll("[^a-z0-9\\-]", "")

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
*/
1717
package org.apache.spark.scheduler.cluster.k8s
1818

19+
import java.util.Locale
20+
1921
import io.fabric8.kubernetes.api.model.Pod
2022

2123
import org.apache.spark.deploy.k8s.Constants._
@@ -52,7 +54,7 @@ object ExecutorPodsSnapshot extends Logging {
5254
if (isDeleted(pod)) {
5355
PodDeleted(pod)
5456
} else {
55-
val phase = pod.getStatus.getPhase.toLowerCase
57+
val phase = pod.getStatus.getPhase.toLowerCase(Locale.ROOT)
5658
phase match {
5759
case "pending" =>
5860
PodPending(pod)

resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.spark.deploy.mesos
1919

20+
import java.util.Locale
2021
import java.util.concurrent.CountDownLatch
2122

2223
import org.apache.spark.{SecurityManager, SparkConf, SparkException}
@@ -60,7 +61,7 @@ private[mesos] class MesosClusterDispatcher(
6061
}
6162

6263
private val publicAddress = Option(conf.getenv("SPARK_PUBLIC_DNS")).getOrElse(args.host)
63-
private val recoveryMode = conf.get(RECOVERY_MODE).toUpperCase()
64+
private val recoveryMode = conf.get(RECOVERY_MODE).toUpperCase(Locale.ROOT)
6465
logInfo("Recovery mode in Mesos dispatcher set to: " + recoveryMode)
6566

6667
private val engineFactory = recoveryMode match {

0 commit comments

Comments
 (0)