ifilonenko
diff --git a/‎common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java
Lines changed: 5 additions & 0 deletions b/‎common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java
Lines changed: 5 additions & 0 deletions
diff --git a/‎common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
Lines changed: 6 additions & 1 deletion b/‎common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
Lines changed: 6 additions & 1 deletion
diff --git a/‎common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
Lines changed: 2 additions & 2 deletions b/‎common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
Lines changed: 2 additions & 2 deletions
diff --git a/‎common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
Lines changed: 1 addition & 1 deletion b/‎common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
Lines changed: 1 addition & 1 deletion b/‎core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java
Lines changed: 1 addition & 1 deletion b/‎core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
Lines changed: 28 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
Lines changed: 28 additions & 2 deletions
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions;
 
 import org.apache.spark.unsafe.memory.MemoryBlock;
+import org.apache.spark.unsafe.types.UTF8String;
 
 /**
  * Simulates Hive's hashing function from Hive v1.2.1
@@ -51,4 +52,8 @@ public static int hashUnsafeBytesBlock(MemoryBlock mb) {
   public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
     return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes));
   }
+
+  public static int hashUTF8String(UTF8String str) {
+    return hashUnsafeBytesBlock(str.getMemoryBlock());
+  }
 }
@@ -20,6 +20,7 @@
 import com.google.common.primitives.Ints;
 
 import org.apache.spark.unsafe.memory.MemoryBlock;
+import org.apache.spark.unsafe.types.UTF8String;
 
 /**
  * 32-bit Murmur3 hasher.  This is based on Guava's Murmur3_32HashFunction.
@@ -82,6 +83,10 @@ public static int hashUnsafeBytesBlock(MemoryBlock base, int seed) {
     return fmix(h1, lengthInBytes);
   }
 
+  public static int hashUTF8String(UTF8String str, int seed) {
+    return hashUnsafeBytesBlock(str.getMemoryBlock(), seed);
+  }
+
   public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
     return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
   }
@@ -91,7 +96,7 @@ public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes,
   }
 
   public static int hashUnsafeBytes2Block(MemoryBlock base, int seed) {
-    // This is compatible with original and another implementations.
+    // This is compatible with original and other implementations.
     // Use this method for new components after Spark 2.3.
     int lengthInBytes = Ints.checkedCast(base.size());
     assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative";
 
@@ -52,8 +52,8 @@ private static void checkBasic(String str, int len) {
 
     assertTrue(s1.contains(s2));
     assertTrue(s2.contains(s1));
-    assertTrue(s1.startsWith(s1));
-    assertTrue(s1.endsWith(s1));
+    assertTrue(s1.startsWith(s2));
+    assertTrue(s1.endsWith(s2));
   }
 
   @Test
 
@@ -164,7 +164,7 @@ class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenProperty
     def padding(origin: String, pad: String, length: Int, isLPad: Boolean): String = {
       if (length <= 0) return ""
       if (length <= origin.length) {
-        if (length <= 0) "" else origin.substring(0, length)
+        origin.substring(0, length)
       } else {
         if (pad.length == 0) return origin
         val toPad = length - origin.length
 
@@ -124,7 +124,7 @@ public UnsafeInMemorySorter(
     int initialSize,
     boolean canUseRadixSort) {
     this(consumer, memoryManager, recordComparator, prefixComparator,
-      consumer.allocateArray(initialSize * 2), canUseRadixSort);
+      consumer.allocateArray(initialSize * 2L), canUseRadixSort);
   }
 
   public UnsafeInMemorySorter(
 
@@ -84,7 +84,7 @@ public void copyRange(LongArray src, int srcPos, LongArray dst, int dstPos, int
 
   @Override
   public LongArray allocate(int length) {
-    assert (length * 2 <= buffer.size()) :
+    assert (length * 2L <= buffer.size()) :
       "the buffer is smaller than required: " + buffer.size() + " < " + (length * 2);
     return buffer;
   }
 
@@ -25,9 +25,10 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
+import org.apache.spark.internal.Logging
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
-private[deploy] object DependencyUtils {
+private[deploy] object DependencyUtils extends Logging {
 
   def resolveMavenDependencies(
       packagesExclusions: String,
@@ -75,7 +76,7 @@ private[deploy] object DependencyUtils {
   def addJarsToClassPath(jars: String, loader: MutableURLClassLoader): Unit = {
     if (jars != null) {
       for (jar <- jars.split(",")) {
-        SparkSubmit.addJarToClasspath(jar, loader)
+        addJarToClasspath(jar, loader)
       }
     }
   }
@@ -151,6 +152,31 @@ private[deploy] object DependencyUtils {
     }.mkString(",")
   }
 
+  def addJarToClasspath(localJar: String, loader: MutableURLClassLoader): Unit = {
+    val uri = Utils.resolveURI(localJar)
+    uri.getScheme match {
+      case "file" | "local" =>
+        val file = new File(uri.getPath)
+        if (file.exists()) {
+          loader.addURL(file.toURI.toURL)
+        } else {
+          logWarning(s"Local jar $file does not exist, skipping.")
+        }
+      case _ =>
+        logWarning(s"Skip remote jar $uri.")
+    }
+  }
+
+  /**
+   * Merge a sequence of comma-separated file lists, some of which may be null to indicate
+   * no files, into a single comma-separated string.
+   */
+  def mergeFileLists(lists: String*): String = {
+    val merged = lists.filterNot(StringUtils.isBlank)
+      .flatMap(Utils.stringToSeq)
+    if (merged.nonEmpty) merged.mkString(",") else null
+  }
+
   private def splitOnFragment(path: String): (URI, Option[String]) = {
     val uri = Utils.resolveURI(path)
     val withoutFragment = new URI(uri.getScheme, uri.getSchemeSpecificPart, null)
Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@`
`18`	`18`	`package org.apache.spark.sql.catalyst.expressions;`
`19`	`19`
`20`	`20`	`import org.apache.spark.unsafe.memory.MemoryBlock;`
	`21`	`+import org.apache.spark.unsafe.types.UTF8String;`
`21`	`22`
`22`	`23`	`/**`
`23`	`24`	`* Simulates Hive's hashing function from Hive v1.2.1`
`@@ -51,4 +52,8 @@ public static int hashUnsafeBytesBlock(MemoryBlock mb) {`
`51`	`52`	`public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {`
`52`	`53`	`return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes));`
`53`	`54`	`}`
	`55`	`+`
	`56`	`+ public static int hashUTF8String(UTF8String str) {`
	`57`	`+ return hashUnsafeBytesBlock(str.getMemoryBlock());`
	`58`	`+ }`
`54`	`59`	`}`
Original file line number	Diff line number	Diff line change
`@@ -52,8 +52,8 @@ private static void checkBasic(String str, int len) {`
`52`	`52`
`53`	`53`	`assertTrue(s1.contains(s2));`
`54`	`54`	`assertTrue(s2.contains(s1));`
`55`		`- assertTrue(s1.startsWith(s1));`
`56`		`- assertTrue(s1.endsWith(s1));`
	`55`	`+ assertTrue(s1.startsWith(s2));`
	`56`	`+ assertTrue(s1.endsWith(s2));`
`57`	`57`	`}`
`58`	`58`
`59`	`59`	`@Test`
Original file line number	Diff line number	Diff line change
`@@ -124,7 +124,7 @@ public UnsafeInMemorySorter(`
`124`	`124`	`int initialSize,`
`125`	`125`	`boolean canUseRadixSort) {`
`126`	`126`	`this(consumer, memoryManager, recordComparator, prefixComparator,`
`127`		`- consumer.allocateArray(initialSize * 2), canUseRadixSort);`
	`127`	`+ consumer.allocateArray(initialSize * 2L), canUseRadixSort);`
`128`	`128`	`}`
`129`	`129`
`130`	`130`	`public UnsafeInMemorySorter(`
Original file line number	Diff line number	Diff line change
`@@ -84,7 +84,7 @@ public void copyRange(LongArray src, int srcPos, LongArray dst, int dstPos, int`
`84`	`84`
`85`	`85`	`@Override`
`86`	`86`	`public LongArray allocate(int length) {`
`87`		`- assert (length * 2 <= buffer.size()) :`
	`87`	`+ assert (length * 2L <= buffer.size()) :`
`88`	`88`	`"the buffer is smaller than required: " + buffer.size() + " < " + (length * 2);`
`89`	`89`	`return buffer;`
`90`	`90`	`}`