@@ -33,21 +33,23 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
33
33
import org .apache .spark .sql .execution .datasources ._
34
34
import org .apache .spark .sql .execution .datasources .parquet .{ParquetFileFormat => ParquetSource }
35
35
import org .apache .spark .sql .execution .metric .SQLMetrics
36
- import org .apache .spark .sql .internal .SQLConf
37
- import org .apache .spark .sql .sources .BaseRelation
36
+ import org .apache .spark .sql .sources .{BaseRelation , Filter }
38
37
import org .apache .spark .sql .types .StructType
39
38
import org .apache .spark .util .Utils
40
39
41
40
trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
42
41
val relation : BaseRelation
43
- val metastoreTableIdentifier : Option [TableIdentifier ]
42
+ val tableIdentifier : Option [TableIdentifier ]
44
43
45
44
protected val nodeNamePrefix : String = " "
46
45
47
46
override val nodeName : String = {
48
- s " Scan $relation ${metastoreTableIdentifier .map(_.unquotedString).getOrElse(" " )}"
47
+ s " Scan $relation ${tableIdentifier .map(_.unquotedString).getOrElse(" " )}"
49
48
}
50
49
50
+ // Metadata that describes more details of this scan.
51
+ protected def metadata : Map [String , String ]
52
+
51
53
override def simpleString : String = {
52
54
val metadataEntries = metadata.toSeq.sorted.map {
53
55
case (key, value) =>
@@ -73,34 +75,25 @@ trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
73
75
74
76
/** Physical plan node for scanning data from a relation. */
75
77
case class RowDataSourceScanExec (
76
- output : Seq [Attribute ],
78
+ fullOutput : Seq [Attribute ],
79
+ requiredColumnsIndex : Seq [Int ],
80
+ filters : Set [Filter ],
81
+ handledFilters : Set [Filter ],
77
82
rdd : RDD [InternalRow ],
78
83
@ transient relation : BaseRelation ,
79
- override val outputPartitioning : Partitioning ,
80
- override val metadata : Map [String , String ],
81
- override val metastoreTableIdentifier : Option [TableIdentifier ])
84
+ override val tableIdentifier : Option [TableIdentifier ])
82
85
extends DataSourceScanExec {
83
86
87
+ def output : Seq [Attribute ] = requiredColumnsIndex.map(fullOutput)
88
+
84
89
override lazy val metrics =
85
90
Map (" numOutputRows" -> SQLMetrics .createMetric(sparkContext, " number of output rows" ))
86
91
87
- val outputUnsafeRows = relation match {
88
- case r : HadoopFsRelation if r.fileFormat.isInstanceOf [ParquetSource ] =>
89
- ! SparkSession .getActiveSession.get.sessionState.conf.getConf(
90
- SQLConf .PARQUET_VECTORIZED_READER_ENABLED )
91
- case _ : HadoopFsRelation => true
92
- case _ => false
93
- }
94
-
95
92
protected override def doExecute (): RDD [InternalRow ] = {
96
- val unsafeRow = if (outputUnsafeRows) {
97
- rdd
98
- } else {
99
- rdd.mapPartitionsWithIndexInternal { (index, iter) =>
100
- val proj = UnsafeProjection .create(schema)
101
- proj.initialize(index)
102
- iter.map(proj)
103
- }
93
+ val unsafeRow = rdd.mapPartitionsWithIndexInternal { (index, iter) =>
94
+ val proj = UnsafeProjection .create(schema)
95
+ proj.initialize(index)
96
+ iter.map(proj)
104
97
}
105
98
106
99
val numOutputRows = longMetric(" numOutputRows" )
@@ -126,24 +119,31 @@ case class RowDataSourceScanExec(
126
119
ctx.INPUT_ROW = row
127
120
ctx.currentVars = null
128
121
val columnsRowInput = exprRows.map(_.genCode(ctx))
129
- val inputRow = if (outputUnsafeRows) row else null
130
122
s """
131
123
|while ( $input.hasNext()) {
132
124
| InternalRow $row = (InternalRow) $input.next();
133
125
| $numOutputRows.add(1);
134
- | ${consume(ctx, columnsRowInput, inputRow ).trim}
126
+ | ${consume(ctx, columnsRowInput, null ).trim}
135
127
| if (shouldStop()) return;
136
128
|}
137
129
""" .stripMargin
138
130
}
139
131
140
- // Only care about `relation` and `metadata` when canonicalizing.
132
+ override val metadata : Map [String , String ] = {
133
+ val markedFilters = for (filter <- filters) yield {
134
+ if (handledFilters.contains(filter)) s " * $filter" else s " $filter"
135
+ }
136
+ Map (
137
+ " ReadSchema" -> output.toStructType.catalogString,
138
+ " PushedFilters" -> markedFilters.mkString(" [" , " , " , " ]" ))
139
+ }
140
+
141
+ // Don't care about `rdd` and `tableIdentifier` when canonicalizing.
141
142
override lazy val canonicalized : SparkPlan =
142
143
copy(
143
- output .map(QueryPlan .normalizeExprId(_, output )),
144
+ fullOutput .map(QueryPlan .normalizeExprId(_, fullOutput )),
144
145
rdd = null ,
145
- outputPartitioning = null ,
146
- metastoreTableIdentifier = None )
146
+ tableIdentifier = None )
147
147
}
148
148
149
149
/**
@@ -154,15 +154,15 @@ case class RowDataSourceScanExec(
154
154
* @param requiredSchema Required schema of the underlying relation, excluding partition columns.
155
155
* @param partitionFilters Predicates to use for partition pruning.
156
156
* @param dataFilters Filters on non-partition columns.
157
- * @param metastoreTableIdentifier identifier for the table in the metastore.
157
+ * @param tableIdentifier identifier for the table in the metastore.
158
158
*/
159
159
case class FileSourceScanExec (
160
160
@ transient relation : HadoopFsRelation ,
161
161
output : Seq [Attribute ],
162
162
requiredSchema : StructType ,
163
163
partitionFilters : Seq [Expression ],
164
164
dataFilters : Seq [Expression ],
165
- override val metastoreTableIdentifier : Option [TableIdentifier ])
165
+ override val tableIdentifier : Option [TableIdentifier ])
166
166
extends DataSourceScanExec with ColumnarBatchScan {
167
167
168
168
val supportsBatch : Boolean = relation.fileFormat.supportBatch(
@@ -261,7 +261,6 @@ case class FileSourceScanExec(
261
261
private val pushedDownFilters = dataFilters.flatMap(DataSourceStrategy .translateFilter)
262
262
logInfo(s " Pushed Filters: ${pushedDownFilters.mkString(" ," )}" )
263
263
264
- // These metadata values make scan plans uniquely identifiable for equality checking.
265
264
override val metadata : Map [String , String ] = {
266
265
def seqToString (seq : Seq [Any ]) = seq.mkString(" [" , " , " , " ]" )
267
266
val location = relation.location
0 commit comments