Skip to content
This repository was archived by the owner on Mar 24, 2025. It is now read-only.

Commit f28f1d2

Browse files
authored
Take into account StreamDecoder.hasLeftoverChar in trying to exactly always correctly determine how much has been read (#468)
1 parent 68b92b3 commit f28f1d2

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

src/main/scala/com/databricks/spark/xml/XmlInputFormat.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ private[xml] class XmlRecordReader extends RecordReader[LongWritable, Text] {
6565
private var reader: Reader = _
6666
private var filePosition: Seekable = _
6767
private var countingIn: CountingInputStream = _
68+
private var readerLeftoverCharFn: () => Boolean = _
6869
private var readerByteBuffer: ByteBuffer = _
6970
private var decompressor: Decompressor = _
7071
private var buffer = new StringBuilder()
@@ -127,6 +128,9 @@ private[xml] class XmlRecordReader extends RecordReader[LongWritable, Text] {
127128
val sdField = reader.getClass.getDeclaredField("sd")
128129
sdField.setAccessible(true)
129130
val sd = sdField.get(reader)
131+
val readerLeftoverCharField = sd.getClass.getDeclaredField("haveLeftoverChar")
132+
readerLeftoverCharField.setAccessible(true)
133+
readerLeftoverCharFn = () => { readerLeftoverCharField.get(sd).asInstanceOf[Boolean] }
130134
val bbField = sd.getClass.getDeclaredField("bb")
131135
bbField.setAccessible(true)
132136
readerByteBuffer = bbField.get(sd).asInstanceOf[ByteBuffer]
@@ -149,7 +153,9 @@ private[xml] class XmlRecordReader extends RecordReader[LongWritable, Text] {
149153
if (filePosition != null) {
150154
filePosition.getPos
151155
} else {
152-
start + countingIn.getByteCount - readerByteBuffer.remaining()
156+
start + countingIn.getByteCount -
157+
readerByteBuffer.remaining() -
158+
(if (readerLeftoverCharFn()) 1 else 0)
153159
}
154160
}
155161

0 commit comments

Comments
 (0)