apache
diff --git a/‎.asf.yaml‎
Lines changed: 2 additions & 0 deletions b/‎.asf.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/build_and_test.yml‎
Lines changed: 31 additions & 4 deletions b/‎.github/workflows/build_and_test.yml‎
Lines changed: 31 additions & 4 deletions
diff --git a/‎.github/workflows/test_report.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/test_report.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎common/utils/src/main/scala/org/apache/spark/SparkException.scala‎
Lines changed: 8 additions & 8 deletions b/‎common/utils/src/main/scala/org/apache/spark/SparkException.scala‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎connector/docker-integration-tests/README.md‎
Lines changed: 1 addition & 0 deletions b/‎connector/docker-integration-tests/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala‎
Lines changed: 19 additions & 19 deletions b/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala‎
Lines changed: 19 additions & 19 deletions
diff --git a/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala‎
Lines changed: 6 additions & 0 deletions b/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala‎
Lines changed: 2 additions & 2 deletions b/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala‎
Lines changed: 1 addition & 1 deletion b/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala‎
Lines changed: 1 addition & 1 deletion b/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala‎
Lines changed: 1 addition & 1 deletion
@@ -18,6 +18,8 @@
 github:
   description: "Apache Spark - A unified analytics engine for large-scale data processing"
   homepage: https://spark.apache.org/
+  features:
+    issues: true
   labels:
     - python
     - scala
 
@@ -1104,11 +1104,14 @@ jobs:
     - name: List Python packages for branch-3.5 and branch-4.0
       if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'
       run: python3.9 -m pip list
+    - name: List Python packages for branch-4.1
+      if: inputs.branch == 'branch-4.1'
+      run: python3.11 -m pip list
     - name: List Python packages
-      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && inputs.branch != 'branch-4.1'
       run: |
         lsb_release -a
-        python3.11 -m pip list
+        python3.12 -m pip list
     - name: Install dependencies for documentation generation
       run: |
         # Keep the version of Bundler here in sync with the following locations:
@@ -1139,8 +1142,8 @@ jobs:
         echo "SKIP_SQLDOC: $SKIP_SQLDOC"
         cd docs
         bundle exec jekyll build
-    - name: Run documentation build
-      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+    - name: Run documentation build for branch-4.1
+      if: inputs.branch == 'branch-4.1'
       run: |
         # We need this link to make sure `python3` points to `python3.11` which contains the prerequisite packages.
         ln -s "$(which python3.11)" "/usr/local/bin/python3"
@@ -1163,6 +1166,30 @@ jobs:
         echo "SKIP_SQLDOC: $SKIP_SQLDOC"
         cd docs
         bundle exec jekyll build
+    - name: Run documentation build
+      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && inputs.branch != 'branch-4.1'
+      run: |
+        # We need this link to make sure `python3` points to `python3.12` which contains the prerequisite packages.
+        ln -s "$(which python3.12)" "/usr/local/bin/python3"
+        # Build docs first with SKIP_API to ensure they are buildable without requiring any
+        # language docs to be built beforehand.
+        cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd ..
+        if [ -f "./dev/is-changed.py" ]; then
+          # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
+          pyspark_modules=`cd dev && python3.12 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
+          if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi
+          if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi
+        fi
+        export PYSPARK_DRIVER_PYTHON=python3.12
+        export PYSPARK_PYTHON=python3.12
+        # Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC`
+        echo "SKIP_ERRORDOC: $SKIP_ERRORDOC"
+        echo "SKIP_SCALADOC: $SKIP_SCALADOC"
+        echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC"
+        echo "SKIP_RDOC: $SKIP_RDOC"
+        echo "SKIP_SQLDOC: $SKIP_SQLDOC"
+        cd docs
+        bundle exec jekyll build
     - name: Tar documentation
       if: github.repository != 'apache/spark'
       run: tar cjf site.tar.bz2 docs/_site
 
@@ -26,7 +26,9 @@ on:
 
 jobs:
   test_report:
-    if: "!contains(fromJson('[\"skipped\", \"cancelled\"]'), github.event.workflow_run.conclusion)"
+    if: >
+      github.event.workflow_run.path != '.github/workflows/pages.yml' &&
+      !contains(fromJson('["skipped", "cancelled"]'), github.event.workflow_run.conclusion)
     runs-on: ubuntu-latest
     permissions:
       actions: read
 
@@ -198,7 +198,7 @@ private[spark] class SparkUpgradeException private(
     )
   }
 
-  def this(
+  private[spark] def this(
     errorClass: String,
     messageParameters: Map[String, String],
     cause: Throwable,
@@ -243,7 +243,7 @@ private[spark] class SparkArithmeticException private(
     )
   }
 
-  def this(
+  private[spark] def this(
     errorClass: String,
     messageParameters: Map[String, String],
     context: Array[QueryContext],
@@ -291,7 +291,7 @@ private[spark] class SparkUnsupportedOperationException private(
     )
   }
 
-  def this(
+  private[spark] def this(
     errorClass: String,
     messageParameters: Map[String, String],
     sqlState: Option[String]) = {
@@ -417,7 +417,7 @@ private[spark] class SparkDateTimeException private(
     )
   }
 
-  def this(
+  private[spark] def this(
     errorClass: String,
     messageParameters: Map[String, String],
     context: Array[QueryContext],
@@ -491,7 +491,7 @@ private[spark] class SparkNumberFormatException private(
     )
   }
 
-  def this(
+  private[spark] def this(
     errorClass: String,
     messageParameters: Map[String, String],
     context: Array[QueryContext],
@@ -547,7 +547,7 @@ private[spark] class SparkIllegalArgumentException private(
     )
   }
 
-  def this(
+  private[spark] def this(
     errorClass: String,
     messageParameters: Map[String, String],
     context: Array[QueryContext],
@@ -639,7 +639,7 @@ private[spark] class SparkRuntimeException private(
     )
   }
 
-  def this(
+  private[spark] def this(
     errorClass: String,
     messageParameters: Map[String, String],
     cause: Throwable,
@@ -763,7 +763,7 @@ private[spark] class SparkArrayIndexOutOfBoundsException private(
     )
   }
 
-  def this(
+  private[spark] def this(
     errorClass: String,
     messageParameters: Map[String, String],
     context: Array[QueryContext],
 
@@ -55,6 +55,7 @@ The following environment variables can be used to specify the custom Docker ima
 - MYSQL_DOCKER_IMAGE_NAME
 - ORACLE_DOCKER_IMAGE_NAME
 - POSTGRES_DOCKER_IMAGE_NAME
+- STARROCKS_DOCKER_IMAGE_NAME
 
 ## Using a custom Docker context
 
 
@@ -79,7 +79,7 @@ class DB2IntegrationSuite extends SharedJDBCIntegrationSuite {
   }
 
   test("Basic test") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
+    val df = spark.read.jdbc(jdbcUrl, "tbl", new Properties)
     val rows = df.collect()
     assert(rows.length == 2)
     val types = rows(0).toSeq.map(x => x.getClass.toString)
@@ -91,7 +91,7 @@ class DB2IntegrationSuite extends SharedJDBCIntegrationSuite {
   test("Numeric types") {
     Seq(true, false).foreach { legacy =>
       withSQLConf(SQLConf.LEGACY_DB2_TIMESTAMP_MAPPING_ENABLED.key -> legacy.toString) {
-        val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
+        val df = spark.read.jdbc(jdbcUrl, "numbers", new Properties)
         val rows = df.collect()
         assert(rows.length == 1)
         val types = rows(0).toSeq.map(x => x.getClass.toString)
@@ -131,7 +131,7 @@ class DB2IntegrationSuite extends SharedJDBCIntegrationSuite {
 
   test("Date types") {
     withDefaultTimeZone(UTC) {
-      val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
+      val df = spark.read.jdbc(jdbcUrl, "dates", new Properties)
       val rows = df.collect()
       assert(rows.length == 1)
       val types = rows(0).toSeq.map(x => x.getClass.toString)
@@ -146,7 +146,7 @@ class DB2IntegrationSuite extends SharedJDBCIntegrationSuite {
   }
 
   test("String types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
+    val df = spark.read.jdbc(jdbcUrl, "strings", new Properties)
     val rows = df.collect()
     assert(rows.length == 1)
     val types = rows(0).toSeq.map(x => x.getClass.toString)
@@ -164,20 +164,20 @@ class DB2IntegrationSuite extends SharedJDBCIntegrationSuite {
 
   test("Basic write test") {
     // cast decflt column with precision value of 38 to DB2 max decimal precision value of 31.
-    val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
+    val df1 = spark.read.jdbc(jdbcUrl, "numbers", new Properties)
       .selectExpr("small", "med", "big", "deci", "flt", "dbl", "real",
       "cast(decflt as decimal(31, 5)) as decflt")
-    val df2 = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
-    val df3 = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
+    val df2 = spark.read.jdbc(jdbcUrl, "dates", new Properties)
+    val df3 = spark.read.jdbc(jdbcUrl, "strings", new Properties)
     df1.write.jdbc(jdbcUrl, "numberscopy", new Properties)
     df2.write.jdbc(jdbcUrl, "datescopy", new Properties)
     df3.write.jdbc(jdbcUrl, "stringscopy", new Properties)
     // spark types that does not have exact matching db2 table types.
-    val df4 = sqlContext.createDataFrame(
+    val df4 = spark.createDataFrame(
       sparkContext.parallelize(Seq(Row("1".toShort, "20".toByte))),
       new StructType().add("c1", ShortType).add("b", ByteType))
     df4.write.jdbc(jdbcUrl, "otherscopy", new Properties)
-    val rows = sqlContext.read.jdbc(jdbcUrl, "otherscopy", new Properties).collect()
+    val rows = spark.read.jdbc(jdbcUrl, "otherscopy", new Properties).collect()
     assert(rows(0).getShort(0) == 1)
     assert(rows(0).getShort(1) == 20)
   }
@@ -215,20 +215,20 @@ class DB2IntegrationSuite extends SharedJDBCIntegrationSuite {
     ).map { case (x, y) =>
       Row(Integer.valueOf(x), String.valueOf(y))
     }
-    val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
+    val df = spark.read.jdbc(jdbcUrl, "tbl", new Properties)
     for (_ <- 0 to 2) {
       df.write.mode(SaveMode.Append).jdbc(jdbcUrl, "tblcopy", new Properties)
     }
-    assert(sqlContext.read.jdbc(jdbcUrl, "tblcopy", new Properties).count() === 6)
+    assert(spark.read.jdbc(jdbcUrl, "tblcopy", new Properties).count() === 6)
     df.write.mode(SaveMode.Overwrite).option("truncate", true)
       .jdbc(jdbcUrl, "tblcopy", new Properties)
-    val actual = sqlContext.read.jdbc(jdbcUrl, "tblcopy", new Properties).collect()
+    val actual = spark.read.jdbc(jdbcUrl, "tblcopy", new Properties).collect()
     assert(actual.length === 2)
     assert(actual.toSet === expectedResult)
   }
 
   test("SPARK-42534: DB2 Limit pushdown test") {
-    val actual = sqlContext.read
+    val actual = spark.read
       .format("jdbc")
       .option("url", jdbcUrl)
       .option("dbtable", "tbl")
@@ -238,7 +238,7 @@ class DB2IntegrationSuite extends SharedJDBCIntegrationSuite {
       .orderBy("x")
       .collect()
 
-    val expected = sqlContext.read
+    val expected = spark.read
       .format("jdbc")
       .option("url", jdbcUrl)
       .option("query", "SELECT x, y FROM tbl ORDER BY x FETCH FIRST 2 ROWS ONLY")
@@ -249,31 +249,31 @@ class DB2IntegrationSuite extends SharedJDBCIntegrationSuite {
   }
 
   test("SPARK-48269: boolean type") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "booleans", new Properties)
+    val df = spark.read.jdbc(jdbcUrl, "booleans", new Properties)
     checkAnswer(df, Row(true))
     Seq(true, false).foreach { legacy =>
       withSQLConf(SQLConf.LEGACY_DB2_BOOLEAN_MAPPING_ENABLED.key -> legacy.toString) {
         val tbl = "booleanscopy" + legacy
         df.write.jdbc(jdbcUrl, tbl, new Properties)
         if (legacy) {
-          checkAnswer(sqlContext.read.jdbc(jdbcUrl, tbl, new Properties), Row("1"))
+          checkAnswer(spark.read.jdbc(jdbcUrl, tbl, new Properties), Row("1"))
         } else {
-          checkAnswer(sqlContext.read.jdbc(jdbcUrl, tbl, new Properties), Row(true))
+          checkAnswer(spark.read.jdbc(jdbcUrl, tbl, new Properties), Row(true))
         }
       }
     }
   }
 
   test("SPARK-48269: GRAPHIC types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "graphics", new Properties)
+    val df = spark.read.jdbc(jdbcUrl, "graphics", new Properties)
     checkAnswer(df, Row("a".padTo(16, ' '), "b"))
     // the padding happens in the source not because of reading as char type
     assert(!df.schema.exists {
       _.metadata.contains(CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY) })
   }
 
   test("SPARK-48269: binary types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "binarys", new Properties)
+    val df = spark.read.jdbc(jdbcUrl, "binarys", new Properties)
     checkAnswer(df, Row(
       "ABC".padTo(10, ' ').getBytes,
       "ABC".getBytes,
 
@@ -210,6 +210,7 @@ abstract class DockerJDBCIntegrationSuite
         assert(response.getState.getRunning)
       }
       jdbcUrl = db.getJdbcUrl(dockerIp, externalPort)
+      sleepBeforeTesting()
       var conn: Connection = null
       eventually(connectionTimeout, interval(1.second)) {
         conn = getConnection()
@@ -255,6 +256,11 @@ abstract class DockerJDBCIntegrationSuite
    */
   def dataPreparation(connection: Connection): Unit
 
+  /**
+   * Sleep for a while before testing.
+   */
+  def sleepBeforeTesting(): Unit = {}
+
   private def cleanupContainer(): Unit = {
     if (docker != null && container != null && !keepContainer) {
       try {
 
@@ -150,9 +150,9 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite
     props.setProperty("principal", principal)
 
     val tableName = "write_test"
-    sqlContext.createDataFrame(Seq(("foo", "bar")))
+    spark.createDataFrame(Seq(("foo", "bar")))
       .write.jdbc(jdbcUrl, tableName, props)
-    val df = sqlContext.read.jdbc(jdbcUrl, tableName, props)
+    val df = spark.read.jdbc(jdbcUrl, tableName, props)
 
     val schema = df.schema
     assert(schema.map(_.dataType).toSeq === Seq(StringType, StringType))
 
@@ -40,7 +40,7 @@ class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override val db = new MariaDBDatabaseOnDocker() {
 
     override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:mysql://$ip:$port/mysql?user=$principal"
+      s"jdbc:mysql://$ip:$port/mysql?user=$principal&permitMysqlScheme"
 
     override def beforeContainerStart(
         hostConfigBuilder: HostConfig,
 
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.jdbc
 
 class MySQLDatabaseOnDocker extends DatabaseOnDocker {
-  override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:9.2.0")
+  override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:9.6.0")
   override val env = Map(
     "MYSQL_ROOT_PASSWORD" -> "rootpass"
   )
Original file line number	Diff line number	Diff line change
`@@ -198,7 +198,7 @@ private[spark] class SparkUpgradeException private(`
`198`	`198`	`)`
`199`	`199`	`}`
`200`	`200`
`201`		`- def this(`
	`201`	`+ private[spark] def this(`
`202`	`202`	`errorClass: String,`
`203`	`203`	`messageParameters: Map[String, String],`
`204`	`204`	`cause: Throwable,`
`@@ -243,7 +243,7 @@ private[spark] class SparkArithmeticException private(`
`243`	`243`	`)`
`244`	`244`	`}`
`245`	`245`
`246`		`- def this(`
	`246`	`+ private[spark] def this(`
`247`	`247`	`errorClass: String,`
`248`	`248`	`messageParameters: Map[String, String],`
`249`	`249`	`context: Array[QueryContext],`
`@@ -291,7 +291,7 @@ private[spark] class SparkUnsupportedOperationException private(`
`291`	`291`	`)`
`292`	`292`	`}`
`293`	`293`
`294`		`- def this(`
	`294`	`+ private[spark] def this(`
`295`	`295`	`errorClass: String,`
`296`	`296`	`messageParameters: Map[String, String],`
`297`	`297`	`sqlState: Option[String]) = {`
`@@ -417,7 +417,7 @@ private[spark] class SparkDateTimeException private(`
`417`	`417`	`)`
`418`	`418`	`}`
`419`	`419`
`420`		`- def this(`
	`420`	`+ private[spark] def this(`
`421`	`421`	`errorClass: String,`
`422`	`422`	`messageParameters: Map[String, String],`
`423`	`423`	`context: Array[QueryContext],`
`@@ -491,7 +491,7 @@ private[spark] class SparkNumberFormatException private(`
`491`	`491`	`)`
`492`	`492`	`}`
`493`	`493`
`494`		`- def this(`
	`494`	`+ private[spark] def this(`
`495`	`495`	`errorClass: String,`
`496`	`496`	`messageParameters: Map[String, String],`
`497`	`497`	`context: Array[QueryContext],`
`@@ -547,7 +547,7 @@ private[spark] class SparkIllegalArgumentException private(`
`547`	`547`	`)`
`548`	`548`	`}`
`549`	`549`
`550`		`- def this(`
	`550`	`+ private[spark] def this(`
`551`	`551`	`errorClass: String,`
`552`	`552`	`messageParameters: Map[String, String],`
`553`	`553`	`context: Array[QueryContext],`
`@@ -639,7 +639,7 @@ private[spark] class SparkRuntimeException private(`
`639`	`639`	`)`
`640`	`640`	`}`
`641`	`641`
`642`		`- def this(`
	`642`	`+ private[spark] def this(`
`643`	`643`	`errorClass: String,`
`644`	`644`	`messageParameters: Map[String, String],`
`645`	`645`	`cause: Throwable,`
`@@ -763,7 +763,7 @@ private[spark] class SparkArrayIndexOutOfBoundsException private(`
`763`	`763`	`)`
`764`	`764`	`}`
`765`	`765`
`766`		`- def this(`
	`766`	`+ private[spark] def this(`
`767`	`767`	`errorClass: String,`
`768`	`768`	`messageParameters: Map[String, String],`
`769`	`769`	`context: Array[QueryContext],`
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@`
`18`	`18`	`package org.apache.spark.sql.jdbc`
`19`	`19`
`20`	`20`	`class MySQLDatabaseOnDocker extends DatabaseOnDocker {`
`21`		`- override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:9.2.0")`
	`21`	`+ override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:9.6.0")`
`22`	`22`	`override val env = Map(`
`23`	`23`	`"MYSQL_ROOT_PASSWORD" -> "rootpass"`
`24`	`24`	`)`