@@ -217,22 +217,14 @@ abstract class CometNativeExec extends CometExec {
217217 // TODO: support native metrics for all operators.
218218 val nativeMetrics = CometMetricNode .fromCometPlan(this )
219219
220+ // Go over all the native scans, in order to see if they need encryption options.
220221 // For each relation in a CometNativeScan generate a hadoopConf,
221222 // for each file path in a relation associate with hadoopConf
222- val cometNativeScans : Seq [CometNativeScanExec ] = this
223- .collectLeaves()
224- .filter(_.isInstanceOf [CometNativeScanExec ])
225- .map(_.asInstanceOf [CometNativeScanExec ])
226- assert(
227- cometNativeScans.size <= 1 ,
228- " We expect one native scan in a Comet plan since we will broadcast one hadoopConf." )
229- // If this assumption changes in the future, you can look at the commit history of #2447
230- // to see how there used to be a map of relations to broadcasted confs in case multiple
231- // relations in a single plan. The example that came up was UNION. See discussion at:
232- // https://github.com/apache/datafusion-comet/pull/2447#discussion_r2406118264
233- val (broadcastedHadoopConfForEncryption, encryptedFilePaths) =
234- cometNativeScans.headOption.fold(
235- (None : Option [Broadcast [SerializableConfiguration ]], Seq .empty[String ])) { scan =>
223+ // This is done per native plan, so only count scans until a comet input is reached.
224+ val encryptionOptions =
225+ mutable.ArrayBuffer .empty[(Broadcast [SerializableConfiguration ], Seq [String ])]
226+ foreachUntilCometInput(this ) {
227+ case scan : CometNativeScanExec =>
236228 // This creates a hadoopConf that brings in any SQLConf "spark.hadoop.*" configs and
237229 // per-relation configs since different tables might have different decryption
238230 // properties.
@@ -244,10 +236,25 @@ abstract class CometNativeExec extends CometExec {
244236 val broadcastedConf =
245237 scan.relation.sparkSession.sparkContext
246238 .broadcast(new SerializableConfiguration (hadoopConf))
247- (Some (broadcastedConf), scan.relation.inputFiles.toSeq)
248- } else {
249- (None , Seq .empty)
239+
240+ val optsTuple : (Broadcast [SerializableConfiguration ], Seq [String ]) =
241+ (broadcastedConf, scan.relation.inputFiles.toSeq)
242+ encryptionOptions += optsTuple
250243 }
244+ case _ => // no-op
245+ }
246+ assert(
247+ encryptionOptions.size <= 1 ,
248+ " We expect one native scan that requires encryption reading in a Comet plan," +
249+ " since we will broadcast one hadoopConf." )
250+ // If this assumption changes in the future, you can look at the commit history of #2447
251+ // to see how there used to be a map of relations to broadcasted confs in case multiple
252+ // relations in a single plan. The example that came up was UNION. See discussion at:
253+ // https://github.com/apache/datafusion-comet/pull/2447#discussion_r2406118264
254+ val (broadcastedHadoopConfForEncryption, encryptedFilePaths) =
255+ encryptionOptions.headOption match {
256+ case Some ((conf, paths)) => (Some (conf), paths)
257+ case None => (None , Seq .empty)
251258 }
252259
253260 def createCometExecIter (
0 commit comments