Skip to content

Commit 637e0f7

Browse files
aajtoddkiiadi
andauthored
feat(rt): replace GSON based JSON serde with KMP compatible impl (#477)
Co-authored-by: Aaron Todd <[email protected]> Co-authored-by: Kyle Thomson <[email protected]>
1 parent 76f754d commit 637e0f7

File tree

21 files changed

+1709
-516
lines changed

21 files changed

+1709
-516
lines changed

runtime/io/common/src/aws/smithy/kotlin/runtime/io/SdkByteReadChannel.kt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55
package aws.smithy.kotlin.runtime.io
66

7+
import aws.smithy.kotlin.runtime.util.text.byteCountUtf8
78
import io.ktor.utils.io.*
89
import io.ktor.utils.io.core.*
910

@@ -160,3 +161,33 @@ internal suspend fun SdkByteReadChannel.readAvailableFallback(dest: SdkByteBuffe
160161
dest.writeFully(tmp)
161162
return tmp.size.toLong()
162163
}
164+
165+
/**
166+
* Reads a UTF-8 code point from the channel. Returns `null` if closed
167+
*/
168+
suspend fun SdkByteReadChannel.readUtf8CodePoint(): Int? {
169+
awaitContent()
170+
if (availableForRead == 0 && isClosedForRead) return null
171+
172+
val firstByte = readByte()
173+
val cnt = byteCountUtf8(firstByte)
174+
var code = when (cnt) {
175+
1 -> firstByte.toInt()
176+
2 -> firstByte.toInt() and 0x1f
177+
3 -> firstByte.toInt() and 0x0f
178+
4 -> firstByte.toInt() and 0x07
179+
else -> throw IllegalStateException("Invalid UTF-8 start sequence: $firstByte")
180+
}
181+
182+
for (i in 1 until cnt) {
183+
awaitContent()
184+
if (availableForRead == 0 && isClosedForRead) throw IllegalStateException("unexpected EOF: expected ${cnt - i} bytes")
185+
val byte = readByte()
186+
val bint = byte.toInt()
187+
if (bint and 0xc0 != 0x80) throw IllegalStateException("invalid UTF-8 successor byte: $byte")
188+
189+
code = (code shl 6) or (bint and 0x3f)
190+
}
191+
192+
return code
193+
}

runtime/io/common/test/aws/smithy/kotlin/runtime/io/SdkByteChannelOpsTest.kt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,36 @@ class SdkByteChannelOpsTest {
131131
yield()
132132
assertFalse(awaitingContent)
133133
}
134+
135+
@Test
136+
fun testReadUtf8Chars() = runSuspendTest {
137+
val chan = SdkByteReadChannel("hello".encodeToByteArray())
138+
assertEquals('h', chan.readUtf8CodePoint()?.toChar())
139+
assertEquals('e', chan.readUtf8CodePoint()?.toChar())
140+
assertEquals('l', chan.readUtf8CodePoint()?.toChar())
141+
assertEquals('l', chan.readUtf8CodePoint()?.toChar())
142+
assertEquals('o', chan.readUtf8CodePoint()?.toChar())
143+
assertNull(chan.readUtf8CodePoint())
144+
}
145+
146+
@Test
147+
fun testReadMultibyteUtf8Chars(): Unit = runSuspendTest {
148+
// https://www.fileformat.info/info/unicode/char/1d122/index.htm
149+
// $ - 1 byte, cent sign - 2bytes, euro sign - 3 bytes, musical clef - 4 points (surrogate pair)
150+
val content = "$¢€\uD834\uDD22"
151+
val chan = SdkByteReadChannel(content.encodeToByteArray())
152+
153+
val expected = listOf(
154+
36, // $
155+
162, // ¢
156+
8364, //
157+
119074 // musical F clef
158+
)
159+
160+
expected.forEachIndexed { i, exp ->
161+
val code = chan.readUtf8CodePoint()
162+
assertEquals(exp, code, "[i=$i] expected $exp, found $code ")
163+
}
164+
assertNull(chan.readUtf8CodePoint())
165+
}
134166
}

runtime/serde/build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ kotlin {
1919
}
2020
}
2121

22-
subprojects {
22+
allprojects {
2323
kotlin {
2424
sourceSets {
2525
commonTest {
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
How to run JSONTestSuite against serde-json deserialize
2+
========================================================
3+
4+
When making changes to the lexer it is a good idea to run the
5+
changes against the [JSONTestSuite](https://github.com/nst/JSONTestSuite) and manually examine the test results.
6+
7+
### How to setup the JSONTestSuite
8+
9+
1. Clone the [JSONTestSuite](https://github.com/nst/JSONTestSuite) repository.
10+
2. In `JSONTestSuite/parsers`, create a new Gradle JVM application project named `test_smithy_kotlin`.
11+
3. Add the following `build.gradle.kts` file
12+
13+
```kotlin
14+
plugins {
15+
kotlin("jvm") version "1.5.30"
16+
application
17+
id("com.github.johnrengelman.shadow") version "7.0.0"
18+
}
19+
20+
application {
21+
mainClass.set("aws.smithy.kotlin.jsontest.MainKt")
22+
}
23+
24+
allprojects {
25+
repositories {
26+
mavenLocal()
27+
mavenCentral()
28+
}
29+
}
30+
31+
32+
// NOTE: set to whatever locally published version you are working on
33+
val smithyKotlinVersion: String = "0.4.1-kmp-json"
34+
dependencies {
35+
implementation(kotlin("stdlib"))
36+
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.5.0")
37+
implementation("aws.smithy.kotlin:serde-json:$smithyKotlinVersion")
38+
implementation("aws.smithy.kotlin:utils:$smithyKotlinVersion")
39+
}
40+
41+
tasks.jar {
42+
manifest {
43+
attributes["Main-Class"] = "aws.smithy.kotlin.jsontest.MainKt"
44+
}
45+
}
46+
```
47+
48+
4. Add the following code to `src/main/kotlin/Main.kt` with:
49+
50+
```kotlin
51+
package aws.smithy.kotlin.jsontest
52+
53+
import kotlinx.coroutines.runBlocking
54+
import kotlin.system.exitProcess
55+
import java.io.IOException
56+
import java.nio.file.Files
57+
import java.nio.file.Paths
58+
import aws.smithy.kotlin.runtime.serde.json.JsonToken
59+
import aws.smithy.kotlin.runtime.serde.json.jsonStreamReader
60+
import aws.smithy.kotlin.runtime.util.InternalApi
61+
62+
63+
@OptIn(InternalApi::class)
64+
suspend fun isValidJson(bytes: ByteArray):Boolean {
65+
val lexer = jsonStreamReader(bytes)
66+
println(lexer::class.qualifiedName)
67+
return try {
68+
val tokens = mutableListOf<JsonToken>()
69+
do {
70+
val token = lexer.nextToken()
71+
tokens.add(token)
72+
}while(token != JsonToken.EndDocument)
73+
74+
// The test suite includes incomplete objects and arrays (e.g. "[null,")
75+
// These are completely valid for this parser since it's just a tokenizer
76+
// and doesn't attempt to make semantic meaning from the input.
77+
// We'll just pretend to fail to satisfy the test suite
78+
val pruned = if (tokens.last() == JsonToken.EndDocument) tokens.dropLast(1) else tokens
79+
if (pruned.first() == JsonToken.BeginArray && pruned.last() != JsonToken.EndArray) {
80+
return false
81+
}
82+
if (pruned.first() == JsonToken.BeginObject && pruned.last() != JsonToken.EndObject) {
83+
return false
84+
}
85+
86+
tokens.isNotEmpty()
87+
}catch(ex: Exception) {
88+
println(ex)
89+
false
90+
}
91+
}
92+
93+
fun main(args: Array<String>): Unit = runBlocking {
94+
if(args.isEmpty()) {
95+
println("Usage: java TestJSONParsing file.json")
96+
exitProcess(2)
97+
}
98+
99+
try {
100+
val data = Files.readAllBytes(Paths.get(args[0]))
101+
if(isValidJson(data)) {
102+
println("valid");
103+
exitProcess(0);
104+
}
105+
println("invalid");
106+
exitProcess(1);
107+
} catch (ex: IOException) {
108+
println(ex)
109+
println("not found");
110+
exitProcess(2);
111+
}
112+
}
113+
```
114+
115+
5. Compile this program with `./gradlew build`.
116+
NOTE: Be sure to publish all of `smithy-kotlin` "runtime" to maven local. It is helpful to just choose a unique version
117+
to be sure that everything is wired up correctly.
118+
6. Modify `JSONTestSuite/run_tests.py` so that the `programs` dictionary only contains this one entry:
119+
120+
```
121+
programs = {
122+
"SmithyKotlin":
123+
{
124+
"url":"",
125+
"commands":["java" , "-jar", os.path.join(PARSERS_DIR, "test_smithy_kotlin/build/libs/test_smithy_kotlin-all.jar")]
126+
}
127+
}
128+
```
129+
130+
7. Run `run_tests.py` and examine the output with a web browser by opening `JSONTestSuite/results/parsing.html`.
131+
132+
### Examining the results
133+
134+
When looking at `JSONTestSuite/results/parsing.html`, there is a matrix of test cases against their
135+
results with a legend at the top.
136+
137+
Any test result marked with blue or light blue is for a test case where correct behavior isn't specified,
138+
so use your best judgement to decide if it should have succeeded or failed.
139+
140+
The other colors are bad and should be carefully examined. At time of writing, the following test cases
141+
succeed when they should fail, and we intentionally left it that way since we're not currently concerned
142+
about being more lenient in the number parsing:
143+
144+
```
145+
146+
n_number_-01.json [-01]
147+
n_number_-2..json [-2.]
148+
n_number_.2e-3.json [.2e-3]
149+
n_number_0.3e+.json [0.3e+]
150+
n_number_0.3e.json [0.3e]
151+
n_number_0.e1.json [0.e1]
152+
n_number_0_capital_E+.json [0E+]
153+
n_number_0_capital_E.json [0E]
154+
n_number_0e+.json [0e+]
155+
n_number_0e.json [0e]
156+
n_number_1.0e+.json [1.0e+]
157+
n_number_1.0e-.json [1.0e-]
158+
n_number_1.0e.json [1.0e]
159+
n_number_2.e+3.json [2.e+3]
160+
n_number_2.e-3.json [2.e-3]
161+
n_number_2.e3.json [2.e3]
162+
n_number_9.e+.json [9.e+]
163+
n_number_neg_int_starting_with_zero.json [-012]
164+
n_number_neg_real_without_int_part.json [-.123]
165+
n_number_real_without_fractional_part.json [1.]
166+
n_number_starting_with_dot.json [.123]
167+
n_number_with_leading_zero.json [012]
168+
```
169+
170+
171+
172+
This test case succeeds with our parser and that's OK since we're
173+
a token streaming parser (multiple values are allowed):
174+
```
175+
n_array_just_minus.json [-]
176+
n_structure_double_array.json [][]
177+
n_structure_whitespace_formfeed.json [0C] <=> [ ]
178+
```

runtime/serde/serde-json/common/src/aws/smithy/kotlin/runtime/serde/json/JsonDeserializer.kt

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ package aws.smithy.kotlin.runtime.serde.json
77
import aws.smithy.kotlin.runtime.serde.*
88

99
/**
10-
* Provides a deserialiser for JSON documents
10+
* Provides a deserializer for JSON documents
1111
*
1212
* @param payload underlying document from which tokens are read
1313
*/
@@ -65,11 +65,11 @@ class JsonDeserializer(payload: ByteArray) : Deserializer, Deserializer.ElementI
6565

6666
override suspend fun deserializeStruct(descriptor: SdkObjectDescriptor): Deserializer.FieldIterator =
6767
when (reader.peek()) {
68-
RawJsonToken.BeginObject -> {
68+
JsonToken.BeginObject -> {
6969
reader.nextTokenOf<JsonToken.BeginObject>()
7070
JsonFieldIterator(reader, descriptor, this)
7171
}
72-
RawJsonToken.Null -> JsonNullFieldIterator(this)
72+
JsonToken.Null -> JsonNullFieldIterator(this)
7373
else -> throw DeserializationException("Unexpected token type ${reader.peek()}")
7474
}
7575

@@ -88,28 +88,28 @@ class JsonDeserializer(payload: ByteArray) : Deserializer, Deserializer.ElementI
8888
return token.value
8989
}
9090

91-
override suspend fun nextHasValue(): Boolean = reader.peek() != RawJsonToken.Null
91+
override suspend fun nextHasValue(): Boolean = reader.peek() != JsonToken.Null
9292

9393
override suspend fun hasNextEntry(): Boolean =
9494
when (reader.peek()) {
95-
RawJsonToken.EndObject -> {
95+
JsonToken.EndObject -> {
9696
// consume the token
9797
reader.nextTokenOf<JsonToken.EndObject>()
9898
false
9999
}
100-
RawJsonToken.Null,
101-
RawJsonToken.EndDocument -> false
100+
JsonToken.Null,
101+
JsonToken.EndDocument -> false
102102
else -> true
103103
}
104104

105105
override suspend fun hasNextElement(): Boolean =
106106
when (reader.peek()) {
107-
RawJsonToken.EndArray -> {
107+
JsonToken.EndArray -> {
108108
// consume the token
109109
reader.nextTokenOf<JsonToken.EndArray>()
110110
false
111111
}
112-
RawJsonToken.EndDocument -> false
112+
JsonToken.EndDocument -> false
113113
else -> true
114114
}
115115
}
@@ -131,13 +131,13 @@ private class JsonFieldIterator(
131131

132132
override suspend fun findNextFieldIndex(): Int? {
133133
val candidate = when (reader.peek()) {
134-
RawJsonToken.EndObject -> {
134+
JsonToken.EndObject -> {
135135
// consume the token
136136
reader.nextTokenOf<JsonToken.EndObject>()
137137
null
138138
}
139-
RawJsonToken.EndDocument -> null
140-
RawJsonToken.Null -> {
139+
JsonToken.EndDocument -> null
140+
JsonToken.Null -> {
141141
reader.nextTokenOf<JsonToken.Null>()
142142
null
143143
}
@@ -151,7 +151,7 @@ private class JsonFieldIterator(
151151

152152
if (candidate != null) {
153153
// found a field
154-
if (reader.peek() == RawJsonToken.Null) {
154+
if (reader.peek() == JsonToken.Null) {
155155
// skip explicit nulls
156156
reader.nextTokenOf<JsonToken.Null>()
157157
return findNextFieldIndex()
@@ -166,17 +166,3 @@ private class JsonFieldIterator(
166166
reader.skipNext()
167167
}
168168
}
169-
170-
// return the next token and require that it be of type [TExpected] or else throw an exception
171-
private suspend inline fun <reified TExpected : JsonToken> JsonStreamReader.nextTokenOf(): TExpected {
172-
val token = this.nextToken()
173-
requireToken<TExpected>(token)
174-
return token as TExpected
175-
}
176-
177-
// require that the given token be of type [TExpected] or else throw an exception
178-
private inline fun <reified TExpected> requireToken(token: JsonToken) {
179-
if (token::class != TExpected::class) {
180-
throw DeserializationException("expected ${TExpected::class}; found ${token::class}")
181-
}
182-
}

0 commit comments

Comments
 (0)