@@ -49,9 +49,9 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
49
49
}
50
50
51
51
plan.transform {
52
- case a @ Aggregate (_, aggExprs, child @ PartitionedRelation (partAttrs, relation )) =>
52
+ case a @ Aggregate (_, aggExprs, child @ PartitionedRelation (_, attrs, filters, rel )) =>
53
53
// We only apply this optimization when only partitioned attributes are scanned.
54
- if (a.references.subsetOf(partAttrs )) {
54
+ if (a.references.subsetOf(attrs )) {
55
55
val aggFunctions = aggExprs.flatMap(_.collect {
56
56
case agg : AggregateExpression => agg
57
57
})
@@ -67,7 +67,7 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
67
67
})
68
68
}
69
69
if (isAllDistinctAgg) {
70
- a.withNewChildren(Seq (replaceTableScanWithPartitionMetadata(child, relation )))
70
+ a.withNewChildren(Seq (replaceTableScanWithPartitionMetadata(child, rel, filters )))
71
71
} else {
72
72
a
73
73
}
@@ -98,27 +98,49 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
98
98
*/
99
99
private def replaceTableScanWithPartitionMetadata (
100
100
child : LogicalPlan ,
101
- relation : LogicalPlan ): LogicalPlan = {
101
+ relation : LogicalPlan ,
102
+ partFilters : Seq [Expression ]): LogicalPlan = {
103
+ // this logic comes from PruneFileSourcePartitions. it ensures that the filter names match the
104
+ // relation's schema. PartitionedRelation ensures that the filters only reference partition cols
105
+ val relFilters = partFilters.map { e =>
106
+ e transform {
107
+ case a : AttributeReference =>
108
+ a.withName(relation.output.find(_.semanticEquals(a)).get.name)
109
+ }
110
+ }
111
+
102
112
child transform {
103
113
case plan if plan eq relation =>
104
114
relation match {
105
115
case l @ LogicalRelation (fsRelation : HadoopFsRelation , _, _, isStreaming) =>
106
116
val partAttrs = getPartitionAttrs(fsRelation.partitionSchema.map(_.name), l)
107
- val partitionData = fsRelation.location.listFiles(Nil , Nil )
108
- LocalRelation (partAttrs, partitionData.map(_.values), isStreaming)
117
+ val partitionData = fsRelation.location.listFiles(relFilters, Nil )
118
+ // partition data may be a stream, which can cause serialization to hit stack level too
119
+ // deep exceptions because it is a recursive structure in memory. converting to array
120
+ // avoids the problem.
121
+ LocalRelation (partAttrs, partitionData.map(_.values).toArray, isStreaming)
109
122
110
123
case relation : HiveTableRelation =>
111
124
val partAttrs = getPartitionAttrs(relation.tableMeta.partitionColumnNames, relation)
112
125
val caseInsensitiveProperties =
113
126
CaseInsensitiveMap (relation.tableMeta.storage.properties)
114
127
val timeZoneId = caseInsensitiveProperties.get(DateTimeUtils .TIMEZONE_OPTION )
115
128
.getOrElse(SQLConf .get.sessionLocalTimeZone)
116
- val partitionData = catalog.listPartitions(relation.tableMeta.identifier).map { p =>
129
+ val partitions = if (partFilters.nonEmpty) {
130
+ catalog.listPartitionsByFilter(relation.tableMeta.identifier, relFilters)
131
+ } else {
132
+ catalog.listPartitions(relation.tableMeta.identifier)
133
+ }
134
+
135
+ val partitionData = partitions.map { p =>
117
136
InternalRow .fromSeq(partAttrs.map { attr =>
118
137
Cast (Literal (p.spec(attr.name)), attr.dataType, Option (timeZoneId)).eval()
119
138
})
120
139
}
121
- LocalRelation (partAttrs, partitionData)
140
+ // partition data may be a stream, which can cause serialization to hit stack level too
141
+ // deep exceptions because it is a recursive structure in memory. converting to array
142
+ // avoids the problem.
143
+ LocalRelation (partAttrs, partitionData.toArray)
122
144
123
145
case _ =>
124
146
throw new IllegalStateException (s " unrecognized table scan node: $relation, " +
@@ -129,35 +151,47 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
129
151
130
152
/**
131
153
* A pattern that finds the partitioned table relation node inside the given plan, and returns a
132
- * pair of the partition attributes and the table relation node.
154
+ * pair of the partition attributes, partition filters, and the table relation node.
133
155
*
134
156
* It keeps traversing down the given plan tree if there is a [[Project ]] or [[Filter ]] with
135
157
* deterministic expressions, and returns result after reaching the partitioned table relation
136
158
* node.
137
159
*/
138
- object PartitionedRelation {
139
-
140
- def unapply (plan : LogicalPlan ): Option [(AttributeSet , LogicalPlan )] = plan match {
141
- case l @ LogicalRelation (fsRelation : HadoopFsRelation , _, _, _)
142
- if fsRelation.partitionSchema.nonEmpty =>
143
- val partAttrs = getPartitionAttrs(fsRelation.partitionSchema.map(_.name), l)
144
- Some ((AttributeSet (partAttrs), l))
145
-
146
- case relation : HiveTableRelation if relation.tableMeta.partitionColumnNames.nonEmpty =>
147
- val partAttrs = getPartitionAttrs(relation.tableMeta.partitionColumnNames, relation)
148
- Some ((AttributeSet (partAttrs), relation))
149
-
150
- case p @ Project (projectList, child) if projectList.forall(_.deterministic) =>
151
- unapply(child).flatMap { case (partAttrs, relation) =>
152
- if (p.references.subsetOf(partAttrs)) Some ((p.outputSet, relation)) else None
153
- }
160
+ object PartitionedRelation extends PredicateHelper {
161
+
162
+ def unapply (
163
+ plan : LogicalPlan ): Option [(AttributeSet , AttributeSet , Seq [Expression ], LogicalPlan )] = {
164
+ plan match {
165
+ case l @ LogicalRelation (fsRelation : HadoopFsRelation , _, _, _)
166
+ if fsRelation.partitionSchema.nonEmpty =>
167
+ val partAttrs = AttributeSet (getPartitionAttrs(fsRelation.partitionSchema.map(_.name), l))
168
+ Some ((partAttrs, partAttrs, Nil , l))
169
+
170
+ case relation : HiveTableRelation if relation.tableMeta.partitionColumnNames.nonEmpty =>
171
+ val partAttrs = AttributeSet (
172
+ getPartitionAttrs(relation.tableMeta.partitionColumnNames, relation))
173
+ Some ((partAttrs, partAttrs, Nil , relation))
174
+
175
+ case p @ Project (projectList, child) if projectList.forall(_.deterministic) =>
176
+ unapply(child).flatMap { case (partAttrs, attrs, filters, relation) =>
177
+ if (p.references.subsetOf(attrs)) {
178
+ Some ((partAttrs, p.outputSet, filters, relation))
179
+ } else {
180
+ None
181
+ }
182
+ }
154
183
155
- case f @ Filter (condition, child) if condition.deterministic =>
156
- unapply(child).flatMap { case (partAttrs, relation) =>
157
- if (f.references.subsetOf(partAttrs)) Some ((partAttrs, relation)) else None
158
- }
184
+ case f @ Filter (condition, child) if condition.deterministic =>
185
+ unapply(child).flatMap { case (partAttrs, attrs, filters, relation) =>
186
+ if (f.references.subsetOf(partAttrs)) {
187
+ Some ((partAttrs, attrs, splitConjunctivePredicates(condition) ++ filters, relation))
188
+ } else {
189
+ None
190
+ }
191
+ }
159
192
160
- case _ => None
193
+ case _ => None
194
+ }
161
195
}
162
196
}
163
197
}
0 commit comments