Skip to content

Commit 59977a8

Browse files
committed
[SPARK-54720][SQL] Add SparkSession.emptyDataFrame with a schema
### What changes were proposed in this pull request? This PR adds a version of `SparkSession.emptyDataFrame` that takes a schema. ### Why are the changes needed? It makes it easier to create an empty DataFrame in Scala. ### Does this PR introduce _any_ user-facing change? Yes, it adds a new API. ### How was this patch tested? I have added a test case. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #53489 from hvanhovell/SPARK-54720. Authored-by: Herman van Hövell <[email protected]> Signed-off-by: Herman van Hövell <[email protected]>
1 parent 1aa4b15 commit 59977a8

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

sql/api/src/main/scala/org/apache/spark/sql/SparkSession.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,15 @@ abstract class SparkSession extends Serializable with Closeable {
210210
*
211211
* @since 2.0.0
212212
*/
213-
@transient
214213
def emptyDataFrame: DataFrame
215214

215+
/**
216+
* Returns a `DataFrame` with schema `schema` and no rows.
217+
*
218+
* @since 4.2.0
219+
*/
220+
def emptyDataFrame(schema: StructType): DataFrame = emptyDataset(Encoders.row(schema))
221+
216222
/**
217223
* Creates a `DataFrame` from a local Seq of Product.
218224
*

sql/api/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import org.scalatest.funsuite.AnyFunSuite
2222

2323
import org.apache.spark.SparkContext
2424
import org.apache.spark.sql.functions.{max, sum}
25+
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
2526

2627
/**
2728
* Test suite for SparkSession implementation binding.
@@ -70,4 +71,17 @@ trait SparkSessionBuilderImplementationBindingSuite
7071
val df = ctx.createDataset(1 to 11).select(max("value").as[Long])
7172
assert(df.head() == 11)
7273
}
74+
75+
test("emptyDataFrame with Schema") {
76+
val session = SparkSession.builder().getOrCreate()
77+
val schema =
78+
new StructType(Array(StructField("a", IntegerType), StructField("b", StringType)))
79+
val df = session.emptyDataFrame(schema)
80+
assert(df.schema == schema)
81+
assert(df.isEmpty)
82+
val derivedSchema = new StructType(Array(StructField("a", IntegerType)))
83+
val derivedDf = df.select("a")
84+
assert(derivedDf.schema == derivedSchema)
85+
assert(derivedDf.isEmpty)
86+
}
7387
}

0 commit comments

Comments
 (0)