File tree Expand file tree Collapse file tree 3 files changed +32
-4
lines changed
main/scala/org/apache/spark/sql
test/scala/org/apache/spark/sql/sources/v2 Expand file tree Collapse file tree 3 files changed +32
-4
lines changed Original file line number Diff line number Diff line change @@ -246,8 +246,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
246
246
df.sparkSession.sessionState.conf)
247
247
val options = sessionOptions ++ extraOptions
248
248
249
- val relation = DataSourceV2Relation .create(source, options)
250
249
if (mode == SaveMode .Append ) {
250
+ val relation = DataSourceV2Relation .create(source, options)
251
251
runCommand(df.sparkSession, " save" ) {
252
252
AppendData .byName(relation, df.logicalPlan)
253
253
}
Original file line number Diff line number Diff line change @@ -351,6 +351,24 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
351
351
}
352
352
}
353
353
}
354
+
355
+ test(" SPARK-25700: do not read schema when writing in other modes except append mode" ) {
356
+ withTempPath { file =>
357
+ val cls = classOf [SimpleWriteOnlyDataSource ]
358
+ val path = file.getCanonicalPath
359
+ val df = spark.range(5 ).select(' id as ' i , - ' id as ' j )
360
+ try {
361
+ df.write.format(cls.getName).option(" path" , path).mode(" error" ).save()
362
+ df.write.format(cls.getName).option(" path" , path).mode(" overwrite" ).save()
363
+ df.write.format(cls.getName).option(" path" , path).mode(" ignore" ).save()
364
+ } catch {
365
+ case e : SchemaReadAttemptException => fail(" Schema read was attempted." , e)
366
+ }
367
+ intercept[SchemaReadAttemptException ] {
368
+ df.write.format(cls.getName).option(" path" , path).mode(" append" ).save()
369
+ }
370
+ }
371
+ }
354
372
}
355
373
356
374
@@ -640,3 +658,14 @@ object SpecificReaderFactory extends PartitionReaderFactory {
640
658
}
641
659
}
642
660
}
661
+
662
+ class SchemaReadAttemptException (m : String ) extends RuntimeException (m)
663
+
664
+ class SimpleWriteOnlyDataSource extends SimpleWritableDataSource {
665
+ override def fullSchema (): StructType = {
666
+ // This is a bit hacky since this source implements read support but throws
667
+ // during schema retrieval. Might have to rewrite but it's done
668
+ // such so for minimised changes.
669
+ throw new SchemaReadAttemptException (" read is not supported" )
670
+ }
671
+ }
Original file line number Diff line number Diff line change @@ -43,13 +43,13 @@ class SimpleWritableDataSource extends DataSourceV2
43
43
with BatchWriteSupportProvider
44
44
with SessionConfigSupport {
45
45
46
- private val schema = new StructType ().add(" i" , " long" ).add(" j" , " long" )
46
+ protected def fullSchema () : StructType = new StructType ().add(" i" , " long" ).add(" j" , " long" )
47
47
48
48
override def keyPrefix : String = " simpleWritableDataSource"
49
49
50
50
class ReadSupport (path : String , conf : Configuration ) extends SimpleReadSupport {
51
51
52
- override def fullSchema (): StructType = schema
52
+ override def fullSchema (): StructType = SimpleWritableDataSource . this .fullSchema()
53
53
54
54
override def planInputPartitions (config : ScanConfig ): Array [InputPartition ] = {
55
55
val dataPath = new Path (path)
@@ -116,7 +116,6 @@ class SimpleWritableDataSource extends DataSourceV2
116
116
schema : StructType ,
117
117
mode : SaveMode ,
118
118
options : DataSourceOptions ): Optional [BatchWriteSupport ] = {
119
- assert(DataType .equalsStructurally(schema.asNullable, this .schema.asNullable))
120
119
assert(! SparkContext .getActive.get.conf.getBoolean(" spark.speculation" , false ))
121
120
122
121
val path = new Path (options.get(" path" ).get())
You can’t perform that action at this time.
0 commit comments