NVIDIA · thirtiseven · Dec 19, 2025 · Dec 22, 2025 · Dec 23, 2025 · Dec 23, 2025
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuReadSequenceFileBinaryFormat.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuReadSequenceFileBinaryFormat.scala
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids
+
+import com.nvidia.spark.rapids.sequencefile.GpuSequenceFileMultiFilePartitionReaderFactory
+import com.nvidia.spark.rapids.sequencefile.GpuSequenceFilePartitionReaderFactory
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
+import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.execution.datasources.{FileFormat, PartitionedFile}
+import org.apache.spark.sql.rapids.GpuFileSourceScanExec
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.SerializableConfiguration
+
+/**
+ * A FileFormat that allows reading Hadoop SequenceFiles and returning raw key/value bytes as
+ * Spark SQL BinaryType columns.
+ *
+ * This is a GPU-enabled scan format in the sense that it returns GPU-backed ColumnarBatch output
+ * (the parsing itself is CPU-side IO + byte parsing).
+ */
+class GpuReadSequenceFileBinaryFormat extends FileFormat with GpuReadFileFormatWithMetrics {
+
+  override def inferSchema(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      files: Seq[FileStatus]): Option[StructType] = Some(SequenceFileBinaryFileFormat.dataSchema)
+
+  override def isSplitable(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      path: Path): Boolean = true
+
+  override def buildReaderWithPartitionValuesAndMetrics(
+      sparkSession: SparkSession,
+      dataSchema: StructType,
+      partitionSchema: StructType,
+      requiredSchema: StructType,
+      filters: Seq[Filter],
+      options: Map[String, String],
+      hadoopConf: Configuration,
+      metrics: Map[String, GpuMetric]): PartitionedFile => Iterator[InternalRow] = {
+    val sqlConf = sparkSession.sessionState.conf
+    val broadcastedHadoopConf =
+      sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+    val rapidsConf = new RapidsConf(sqlConf)
+
+    val factory = GpuSequenceFilePartitionReaderFactory(
+      sqlConf,
+      broadcastedHadoopConf,
+      requiredSchema,
+      partitionSchema,
+      rapidsConf,
+      metrics,
+      options)
+    PartitionReaderIterator.buildReader(factory)
+  }
+
+  // Default to multi-file reads (recommended for many small files).
+  override def isPerFileReadEnabled(conf: RapidsConf): Boolean = false
+
+  override def createMultiFileReaderFactory(
+      broadcastedConf: Broadcast[SerializableConfiguration],
+      pushedFilters: Array[Filter],
+      fileScan: GpuFileSourceScanExec): PartitionReaderFactory = {
+    GpuSequenceFileMultiFilePartitionReaderFactory(
+      fileScan.conf,
+      broadcastedConf,
+      fileScan.requiredSchema,
+      fileScan.readPartitionSchema,
+      fileScan.rapidsConf,
+      fileScan.allMetrics,
+      fileScan.queryUsesInputFile)
+  }
+}
+
+object GpuReadSequenceFileBinaryFormat {
+  def tagSupport(meta: SparkPlanMeta[FileSourceScanExec]): Unit = {
+    val fsse = meta.wrapped
+    val required = fsse.requiredSchema
+    // Only support reading BinaryType columns named "key" and/or "value".
+    required.fields.foreach { f =>
+      val isKey = f.name.equalsIgnoreCase(SequenceFileBinaryFileFormat.KEY_FIELD)
+      val isValue = f.name.equalsIgnoreCase(SequenceFileBinaryFileFormat.VALUE_FIELD)
+      if ((isKey || isValue) && f.dataType != org.apache.spark.sql.types.BinaryType) {
+        meta.willNotWorkOnGpu(
+          s"SequenceFileBinary only supports BinaryType for " +
+            s"'${SequenceFileBinaryFileFormat.KEY_FIELD}' and " +
+            s"'${SequenceFileBinaryFileFormat.VALUE_FIELD}' columns, but saw " +
+            s"${f.name}: ${f.dataType.catalogString}")
+      }
+    }
+  }
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SequenceFileBinaryFileFormat.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SequenceFileBinaryFileFormat.scala
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids
+
+import java.io.DataOutputStream
+import java.net.URI
+import java.util
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.io.{DataOutputBuffer, SequenceFile}
+import org.apache.hadoop.mapreduce.Job
+import org.slf4j.LoggerFactory
+
+import org.apache.spark.TaskContext
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection}
+import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
+import org.apache.spark.sql.sources.{DataSourceRegister, Filter}
+import org.apache.spark.sql.types.{BinaryType, StructField, StructType}
+import org.apache.spark.util.SerializableConfiguration
+
+/**
+ * A Spark SQL file format that reads Hadoop SequenceFiles and returns raw bytes for key/value.
+ *
+ * The default inferred schema is:
+ *  - key: BinaryType
+ *  - value: BinaryType
+ *
+ * This format is intended to support protobuf payloads stored as raw bytes in the SequenceFile
+ * record value bytes. It currently only supports uncompressed SequenceFiles.
+ *
+ * Usage:
+ * {{{
+ *   val df = spark.read
+ *     .format("com.nvidia.spark.rapids.SequenceFileBinaryFileFormat")
+ *     .load("path/to/sequencefiles")
+ * }}}
+ */
+class SequenceFileBinaryFileFormat extends FileFormat with DataSourceRegister with Serializable {
+  import SequenceFileBinaryFileFormat._
+
+  override def shortName(): String = SHORT_NAME
+
+  override def inferSchema(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      files: Seq[FileStatus]): Option[StructType] = Some(dataSchema)
+
+  override def isSplitable(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      path: Path): Boolean = true
+
+  override def buildReaderWithPartitionValues(
+      sparkSession: SparkSession,
+      dataSchema: StructType,
+      partitionSchema: StructType,
+      requiredSchema: StructType,
+      filters: Seq[Filter],
+      options: Map[String, String],
+      hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
+    // Hadoop Configuration is not serializable; Spark will serialize the returned reader function.
+    val broadcastedHadoopConf =
+      sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+    (partFile: PartitionedFile) => {
+      val filePathStr = partFile.filePath.toString
+      val path = new Path(new URI(filePathStr))
+      val conf = new Configuration(broadcastedHadoopConf.value.value)
+      val reader =
+        try {
+          new SequenceFile.Reader(conf, SequenceFile.Reader.file(path))
+        } catch {
+          case e: Exception =>
+            val msg = s"Failed to open SequenceFile reader for $path"
+            LoggerFactory.getLogger(classOf[SequenceFileBinaryFileFormat]).error(msg, e)
+            throw e
+        }
+
+      // For the initial version, we explicitly fail fast on compressed SequenceFiles.
+      // (Record- and block-compressed files can be added later.)
+      if (reader.isCompressed || reader.isBlockCompressed) {
+        val compressionType = reader.getCompressionType
+        val msg = s"$SHORT_NAME does not support compressed SequenceFiles " +
+          s"(compressionType=$compressionType), " +
+          s"file=$path, keyClass=${reader.getKeyClassName}, " +
+          s"valueClass=${reader.getValueClassName}"
+        LoggerFactory.getLogger(classOf[SequenceFileBinaryFileFormat]).error(msg)
+        reader.close()
+        throw new UnsupportedOperationException(msg)
+      }
+
+      val start = partFile.start
+      val end = start + partFile.length
+      if (start > 0) {
+        reader.sync(start)
+      }
+
+      val reqFields = requiredSchema.fields
+      val reqLen = reqFields.length
+      val partLen = partitionSchema.length
+      val totalLen = reqLen + partLen
+      val outputSchema = StructType(requiredSchema.fields ++ partitionSchema.fields)
+
+      val fieldInfos = reqFields.map { f =>
+        if (f.name.equalsIgnoreCase(KEY_FIELD)) 1
+        else if (f.name.equalsIgnoreCase(VALUE_FIELD)) 2
+        else 0
+      }
+
+      val keyBuf = new DataOutputBuffer()
+      val valueBytes = reader.createValueBytes()
+      val valueOut = new DataOutputBuffer()
+      val valueDos = new DataOutputStream(valueOut)
+
+      // Register a task completion listener to ensure the reader is closed
+      // even if the iterator is abandoned early or an exception occurs.
+      Option(TaskContext.get()).foreach { tc =>
+        tc.addTaskCompletionListener[Unit](_ => reader.close())
+      }
+
+      new Iterator[InternalRow] {
+        private[this] val unsafeProj = UnsafeProjection.create(outputSchema)
+        private[this] var nextRow: InternalRow = _
+        private[this] var prepared = false
+        private[this] var done = false
+
+        override def hasNext: Boolean = {
+          if (!prepared && !done) {
+            prepared = true
+            keyBuf.reset()
+            if (reader.getPosition < end && reader.nextRaw(keyBuf, valueBytes) >= 0) {
+              nextRow = buildRow()
+            } else {
+              done = true
+              close()
+            }
+          }
+          !done
+        }
+
+        override def next(): InternalRow = {
+          if (!hasNext) {
+            throw new NoSuchElementException("End of stream")
+          }
+          prepared = false
+          val ret = nextRow
+          nextRow = null
+          ret
+        }
+
+        private def buildRow(): InternalRow = {
+          val row = new GenericInternalRow(totalLen)
+          var valueCopied = false
+          var i = 0
+          while (i < reqLen) {
+            fieldInfos(i) match {
+              case 1 =>
+                val keyLen = keyBuf.getLength
+                row.update(i, util.Arrays.copyOf(keyBuf.getData, keyLen))
+              case 2 =>
+                if (!valueCopied) {
+                  valueOut.reset()
+                  valueBytes.writeUncompressedBytes(valueDos)
+                  valueCopied = true
+                }
+                val valueLen = valueOut.getLength
+                row.update(i, util.Arrays.copyOf(valueOut.getData, valueLen))
+              case _ =>
+                row.setNullAt(i)
+            }
+            i += 1
+          }
+
+          // Append partition values (if any)
+          var p = 0
+          while (p < partLen) {
+            val dt = partitionSchema.fields(p).dataType
+            row.update(reqLen + p, partFile.partitionValues.get(p, dt))
+            p += 1
+          }
+          // Spark expects UnsafeRow for downstream serialization.
+          unsafeProj.apply(row).copy()
+        }
+
+        private def close(): Unit = {
+          reader.close()
+        }
+      }
+    }
+  }
+
+  override def prepareWrite(
+      sparkSession: SparkSession,
+      job: Job,
+      options: Map[String, String],
+      dataSchema: StructType): OutputWriterFactory = {
+    throw new UnsupportedOperationException(
+      s"${this.getClass.getCanonicalName} does not support writing")
+  }
+}
+
+object SequenceFileBinaryFileFormat {
+  final val SHORT_NAME: String = "sequencefilebinary"
+  final val KEY_FIELD: String = "key"
+  final val VALUE_FIELD: String = "value"
+
+  final val dataSchema: StructType = StructType(Seq(
+    StructField(KEY_FIELD, BinaryType, nullable = true),
+    StructField(VALUE_FIELD, BinaryType, nullable = true)
+  ))
+}