Skip to content

Commit 308996b

Browse files
gengliangwangcloud-fan
authored andcommitted
[SPARK-26716][SPARK-26765][FOLLOWUP][SQL] Clean up schema validation methods and override toString method in Avro
## What changes were proposed in this pull request? In apache#23639, the API `supportDataType` is refactored. We should also remove the method `verifyWriteSchema` and `verifyReadSchema` in `DataSourceUtils`. Since the error message use `FileFormat.toString` to specify the data source naming, this PR also overriding the `toString` method in `AvroFileFormat`. ## How was this patch tested? Unit test. Closes apache#23699 from gengliangwang/SPARK-26716-followup. Authored-by: Gengliang Wang <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent d4d6df2 commit 308996b

File tree

5 files changed

+6
-19
lines changed

5 files changed

+6
-19
lines changed

external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ private[avro] class AvroFileFormat extends FileFormat
124124

125125
override def shortName(): String = "avro"
126126

127+
override def toString(): String = "Avro"
128+
127129
override def isSplitable(
128130
sparkSession: SparkSession,
129131
options: Map[String, String],

external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -896,7 +896,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
896896
sql("select testType()").write.format("avro").mode("overwrite").save(tempDir)
897897
}.getMessage
898898
assert(msg.toLowerCase(Locale.ROOT)
899-
.contains(s"data source does not support calendarinterval data type."))
899+
.contains(s"avro data source does not support calendarinterval data type."))
900900
}
901901
}
902902

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ case class DataSource(
412412
hs.partitionSchema.map(_.name),
413413
"in the partition schema",
414414
equality)
415-
DataSourceUtils.verifyReadSchema(hs.fileFormat, hs.dataSchema)
415+
DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema)
416416
case _ =>
417417
SchemaUtils.checkColumnNameDuplication(
418418
relation.schema.map(_.name),

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,11 @@ import org.apache.spark.sql.types._
2424

2525

2626
object DataSourceUtils {
27-
28-
/**
29-
* Verify if the schema is supported in datasource in write path.
30-
*/
31-
def verifyWriteSchema(format: FileFormat, schema: StructType): Unit = {
32-
verifySchema(format, schema, isReadPath = false)
33-
}
34-
35-
/**
36-
* Verify if the schema is supported in datasource in read path.
37-
*/
38-
def verifyReadSchema(format: FileFormat, schema: StructType): Unit = {
39-
verifySchema(format, schema, isReadPath = true)
40-
}
41-
4227
/**
4328
* Verify if the schema is supported in datasource. This verification should be done
4429
* in a driver side.
4530
*/
46-
private def verifySchema(format: FileFormat, schema: StructType, isReadPath: Boolean): Unit = {
31+
def verifySchema(format: FileFormat, schema: StructType): Unit = {
4732
schema.foreach { field =>
4833
if (!format.supportDataType(field.dataType)) {
4934
throw new AnalysisException(

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ object FileFormatWriter extends Logging {
9898
val caseInsensitiveOptions = CaseInsensitiveMap(options)
9999

100100
val dataSchema = dataColumns.toStructType
101-
DataSourceUtils.verifyWriteSchema(fileFormat, dataSchema)
101+
DataSourceUtils.verifySchema(fileFormat, dataSchema)
102102
// Note: prepareWrite has side effect. It sets "job".
103103
val outputWriterFactory =
104104
fileFormat.prepareWrite(sparkSession, job, caseInsensitiveOptions, dataSchema)

0 commit comments

Comments
 (0)