@@ -37,10 +37,11 @@ import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat =>
37
37
import org .apache .spark .sql .execution .metric .SQLMetrics
38
38
import org .apache .spark .sql .sources .{BaseRelation , Filter }
39
39
import org .apache .spark .sql .types .StructType
40
+ import org .apache .spark .sql .vectorized .ColumnarBatch
40
41
import org .apache .spark .util .Utils
41
42
import org .apache .spark .util .collection .BitSet
42
43
43
- trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
44
+ trait DataSourceScanExec extends LeafExecNode {
44
45
val relation : BaseRelation
45
46
val tableIdentifier : Option [TableIdentifier ]
46
47
@@ -69,6 +70,12 @@ trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
69
70
private def redact (text : String ): String = {
70
71
Utils .redact(sqlContext.sessionState.conf.stringRedactionPattern, text)
71
72
}
73
+
74
+ /**
75
+ * The data being read in. This is to provide input to the tests in a way compatible with
76
+ * [[InputRDDCodegen ]] which all implementations used to extend.
77
+ */
78
+ def inputRDDs (): Seq [RDD [InternalRow ]]
72
79
}
73
80
74
81
/** Physical plan node for scanning data from a relation. */
@@ -141,11 +148,11 @@ case class FileSourceScanExec(
141
148
optionalBucketSet : Option [BitSet ],
142
149
dataFilters : Seq [Expression ],
143
150
override val tableIdentifier : Option [TableIdentifier ])
144
- extends DataSourceScanExec with ColumnarBatchScan {
151
+ extends DataSourceScanExec {
145
152
146
153
// Note that some vals referring the file-based relation are lazy intentionally
147
154
// so that this plan can be canonicalized on executor side too. See SPARK-23731.
148
- override lazy val supportsBatch : Boolean = {
155
+ override lazy val supportsColumnar : Boolean = {
149
156
relation.fileFormat.supportBatch(relation.sparkSession, schema)
150
157
}
151
158
@@ -275,7 +282,7 @@ case class FileSourceScanExec(
275
282
Map (
276
283
" Format" -> relation.fileFormat.toString,
277
284
" ReadSchema" -> requiredSchema.catalogString,
278
- " Batched" -> supportsBatch .toString,
285
+ " Batched" -> supportsColumnar .toString,
279
286
" PartitionFilters" -> seqToString(partitionFilters),
280
287
" PushedFilters" -> seqToString(pushedDownFilters),
281
288
" DataFilters" -> seqToString(dataFilters),
@@ -302,7 +309,7 @@ case class FileSourceScanExec(
302
309
withSelectedBucketsCount
303
310
}
304
311
305
- private lazy val inputRDD : RDD [InternalRow ] = {
312
+ lazy val inputRDD : RDD [InternalRow ] = {
306
313
val readFile : (PartitionedFile ) => Iterator [InternalRow ] =
307
314
relation.fileFormat.buildReaderWithPartitionValues(
308
315
sparkSession = relation.sparkSession,
@@ -334,29 +341,30 @@ case class FileSourceScanExec(
334
341
" scanTime" -> SQLMetrics .createTimingMetric(sparkContext, " scan time" ))
335
342
336
343
protected override def doExecute (): RDD [InternalRow ] = {
337
- if (supportsBatch) {
338
- // in the case of fallback, this batched scan should never fail because of:
339
- // 1) only primitive types are supported
340
- // 2) the number of columns should be smaller than spark.sql.codegen.maxFields
341
- WholeStageCodegenExec (this )(codegenStageId = 0 ).execute()
342
- } else {
343
- val numOutputRows = longMetric(" numOutputRows" )
344
-
345
- if (needsUnsafeRowConversion) {
346
- inputRDD.mapPartitionsWithIndexInternal { (index, iter) =>
347
- val proj = UnsafeProjection .create(schema)
348
- proj.initialize(index)
349
- iter.map( r => {
350
- numOutputRows += 1
351
- proj(r)
352
- })
353
- }
354
- } else {
355
- inputRDD.map { r =>
344
+ val numOutputRows = longMetric(" numOutputRows" )
345
+
346
+ if (needsUnsafeRowConversion) {
347
+ inputRDD.mapPartitionsWithIndexInternal { (index, iter) =>
348
+ val proj = UnsafeProjection .create(schema)
349
+ proj.initialize(index)
350
+ iter.map( r => {
356
351
numOutputRows += 1
357
- r
358
- }
352
+ proj(r)
353
+ })
359
354
}
355
+ } else {
356
+ inputRDD.map { r =>
357
+ numOutputRows += 1
358
+ r
359
+ }
360
+ }
361
+ }
362
+
363
+ protected override def doExecuteColumnar (): RDD [ColumnarBatch ] = {
364
+ val numOutputRows = longMetric(" numOutputRows" )
365
+ inputRDD.asInstanceOf [RDD [ColumnarBatch ]].map { batch =>
366
+ numOutputRows += batch.numRows()
367
+ batch
360
368
}
361
369
}
362
370
0 commit comments