@@ -96,23 +96,37 @@ private[sequencefile] final class HostBinaryListBufferer(
9696 }
9797
9898 def addBytes (bytes : Array [Byte ], offset : Int , len : Int ): Unit = {
99+ val newEnd = dataLocation + len
100+ if (newEnd > Int .MaxValue ) {
101+ throw new IllegalStateException (
102+ s " Binary column child size $newEnd would exceed INT32 offset limit " )
103+ }
99104 growOffsetsIfNeeded()
100- val end = dataLocation + len
101- growDataIfNeeded(end)
102- offsetsBuffer.setInt(numRows.toLong * DType . INT32 .getSizeInBytes, dataLocation.toInt)
105+ growDataIfNeeded(newEnd)
106+ val offsetPosition = numRows.toLong * DType . INT32 .getSizeInBytes
107+ val startDataLocation = dataLocation
103108 dataBuffer.setBytes(dataLocation, bytes, offset, len)
104- dataLocation = end
109+ dataLocation = newEnd
110+ // Write offset only after successful data write
111+ offsetsBuffer.setInt(offsetPosition, startDataLocation.toInt)
105112 numRows += 1
106113 }
107114
108115 def addValueBytes (valueBytes : SequenceFile .ValueBytes , len : Int ): Unit = {
116+ val newEnd = dataLocation + len
117+ if (newEnd > Int .MaxValue ) {
118+ throw new IllegalStateException (
119+ s " Binary column child size $newEnd would exceed INT32 offset limit " )
120+ }
109121 growOffsetsIfNeeded()
110- val end = dataLocation + len
111- growDataIfNeeded(end)
112- offsetsBuffer.setInt(numRows.toLong * DType . INT32 .getSizeInBytes, dataLocation.toInt)
122+ growDataIfNeeded(newEnd)
123+ val offsetPosition = numRows.toLong * DType . INT32 .getSizeInBytes
124+ val startDataLocation = dataLocation
113125 out.seek(dataLocation)
114126 valueBytes.writeUncompressedBytes(dos)
115127 dataLocation = out.getPos
128+ // Write offset only after successful data write
129+ offsetsBuffer.setInt(offsetPosition, startDataLocation.toInt)
116130 numRows += 1
117131 }
118132
@@ -149,6 +163,9 @@ private[sequencefile] final class HostBinaryListBufferer(
149163 }
150164 }
151165 offsetsBuffer = null
166+ // The stream wrappers (out, dos) don't hold independent resources - they just wrap the
167+ // dataBuffer which is now owned by childHost. Setting to null without close() is intentional
168+ // to avoid attempting operations on the transferred buffer.
152169 out = null
153170 dos = null
154171
@@ -327,7 +344,7 @@ class SequenceFilePartitionReader(
327344 val recBytes = recordBytes(keyLen, valueLen)
328345
329346 // If this record doesn't fit, keep it for the next batch (unless it's the first row)
330- if (rows > 0 && recBytes > 0 && bytes + recBytes > maxBytesPerBatch) {
347+ if (rows > 0 && bytes + recBytes > maxBytesPerBatch) {
331348 pending = Some (makePending(keyLen, valueLen))
332349 keepReading = false
333350 } else {
0 commit comments