Skip to content

Commit 0c0648e

Browse files
Fixed decoding of huge JSON data for okio streams
Fixes #2006 Co-authored-by: Leonid Startsev <[email protected]>
1 parent 83b6e33 commit 0c0648e

File tree

3 files changed

+71
-23
lines changed

3 files changed

+71
-23
lines changed

formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ internal class OkioSerialReader(private val source: BufferedSource): SerialReade
5050
override fun read(buffer: CharArray, bufferOffset: Int, count: Int): Int {
5151
var i = 0
5252
while (i < count && !source.exhausted()) {
53-
buffer[i] = source.readUtf8CodePoint().toChar()
53+
buffer[bufferOffset + i] = source.readUtf8CodePoint().toChar()
5454
i++
5555
}
5656
return if (i > 0) i else -1
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright 2017-2022 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
3+
*/
4+
5+
package kotlinx.serialization.json
6+
7+
import kotlinx.serialization.Serializable
8+
import kotlin.test.Test
9+
10+
class JsonHugeDataSerializationTest : JsonTestBase() {
11+
12+
@Serializable
13+
private data class Node(
14+
val children: List<Node>
15+
)
16+
17+
private fun createNodes(count: Int, depth: Int): List<Node> {
18+
val ret = mutableListOf<Node>()
19+
if (depth == 0) return ret
20+
for (i in 0 until count) {
21+
ret.add(Node(createNodes(1, depth - 1)))
22+
}
23+
return ret
24+
}
25+
26+
@Test
27+
fun test() {
28+
// create some huge instance
29+
val rootNode = Node(createNodes(1000, 10))
30+
31+
val expectedJson = Json.encodeToString(Node.serializer(), rootNode)
32+
33+
/*
34+
The assertJsonFormAndRestored function, when checking the encoding, will call Json.encodeToString(...) for `JsonTestingMode.STREAMING`
35+
since the string `expectedJson` was generated by the same function, the test will always consider
36+
the encoding to the `STREAMING` mode is correct, even if there was actually an error there. So only TREE, JAVA_STREAMS and OKIO are actually being tested here
37+
*/
38+
assertJsonFormAndRestored(Node.serializer(), rootNode, expectedJson)
39+
}
40+
}

formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,31 @@ private const val DEFAULT_THRESHOLD = 128
1414
* For some reason this hand-rolled implementation is faster than
1515
* fun ArrayAsSequence(s: CharArray): CharSequence = java.nio.CharBuffer.wrap(s, 0, length)
1616
*/
17-
private class ArrayAsSequence(private val source: CharArray) : CharSequence {
18-
override val length: Int = source.size
17+
internal class ArrayAsSequence(val buffer: CharArray) : CharSequence {
18+
override var length: Int = buffer.size
1919

20-
override fun get(index: Int): Char = source[index]
20+
override fun get(index: Int): Char = buffer[index]
2121

2222
override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
23-
return source.concatToString(startIndex, endIndex)
23+
return buffer.concatToString(startIndex, minOf(endIndex, length))
24+
}
25+
26+
fun substring(startIndex: Int, endIndex: Int): String {
27+
return buffer.concatToString(startIndex, minOf(endIndex, length))
28+
}
29+
30+
fun trim(newSize: Int) {
31+
length = minOf(buffer.size, newSize)
2432
}
2533
}
2634

2735
internal class ReaderJsonLexer(
2836
private val reader: SerialReader,
29-
private var _source: CharArray = CharArray(BATCH_SIZE)
37+
charsBuffer: CharArray = CharArray(BATCH_SIZE)
3038
) : AbstractJsonLexer() {
3139
private var threshold: Int = DEFAULT_THRESHOLD // chars
3240

33-
override var source: CharSequence = ArrayAsSequence(_source)
41+
override val source: ArrayAsSequence = ArrayAsSequence(charsBuffer)
3442

3543
init {
3644
preload(0)
@@ -65,22 +73,22 @@ internal class ReaderJsonLexer(
6573
return false
6674
}
6775

68-
private fun preload(spaceLeft: Int) {
69-
val buffer = _source
70-
buffer.copyInto(buffer, 0, currentPosition, currentPosition + spaceLeft)
71-
var read = spaceLeft
72-
val sizeTotal = _source.size
73-
while (read != sizeTotal) {
74-
val actual = reader.read(buffer, read, sizeTotal - read)
76+
private fun preload(unprocessedCount: Int) {
77+
val buffer = source.buffer
78+
if (unprocessedCount != 0) {
79+
buffer.copyInto(buffer, 0, currentPosition, currentPosition + unprocessedCount)
80+
}
81+
var filledCount = unprocessedCount
82+
val sizeTotal = source.length
83+
while (filledCount != sizeTotal) {
84+
val actual = reader.read(buffer, filledCount, sizeTotal - filledCount)
7585
if (actual == -1) {
7686
// EOF, resizing the array so it matches input size
77-
// Can also be done by extracting source.length to a separate var
78-
_source = _source.copyOf(read)
79-
source = ArrayAsSequence(_source)
87+
source.trim(filledCount)
8088
threshold = -1
8189
break
8290
}
83-
read += actual
91+
filledCount += actual
8492
}
8593
currentPosition = 0
8694
}
@@ -115,7 +123,7 @@ internal class ReaderJsonLexer(
115123

116124
override fun ensureHaveChars() {
117125
val cur = currentPosition
118-
val oldSize = _source.size
126+
val oldSize = source.length
119127
val spaceLeft = oldSize - cur
120128
if (spaceLeft > threshold) return
121129
// warning: current position is not updated during string consumption
@@ -152,19 +160,19 @@ internal class ReaderJsonLexer(
152160
}
153161

154162
override fun indexOf(char: Char, startPos: Int): Int {
155-
val src = _source
156-
for (i in startPos until src.size) {
163+
val src = source
164+
for (i in startPos until src.length) {
157165
if (src[i] == char) return i
158166
}
159167
return -1
160168
}
161169

162170
override fun substring(startPos: Int, endPos: Int): String {
163-
return _source.concatToString(startPos, endPos)
171+
return source.substring(startPos, endPos)
164172
}
165173

166174
override fun appendRange(fromIndex: Int, toIndex: Int) {
167-
escapedString.appendRange(_source, fromIndex, toIndex)
175+
escapedString.appendRange(source.buffer, fromIndex, toIndex)
168176
}
169177

170178
// Can be carefully implemented but postponed for now

0 commit comments

Comments
 (0)