Skip to content

Commit fdcd389

Browse files
committed
Added Jackson JSON parser extension. Regenerated JsonParser using the new LLkParserGenerator. Fixed toCms unicode bug.
1 parent ee617da commit fdcd389

File tree

10 files changed

+652
-1505
lines changed

10 files changed

+652
-1505
lines changed

bin/project.cmd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ val (libraryShared, libraryJvm) = moduleSharedJvmPub(
7575
"org.sireum:presentasi-jfx:",
7676
"org.scalameta::scalafmt-cli:",
7777
"org.apache.commons:commons-compress:",
78-
"it.unimi.dsi:fastutil-core:"
78+
"it.unimi.dsi:fastutil-core:",
79+
"org.commonmark:commonmark-ext-yaml-front-matter:"
7980
),
8081
pubOpt = pub(
8182
desc = "Slang Runtime Library",
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// #Sireum
2+
/*
3+
Copyright (c) 2017-2026,Robby, Kansas State University
4+
All rights reserved.
5+
6+
Redistribution and use in source and binary forms, with or without
7+
modification, are permitted provided that the following conditions are met:
8+
9+
1. Redistributions of source code must retain the above copyright notice, this
10+
list of conditions and the following disclaimer.
11+
2. Redistributions in binary form must reproduce the above copyright notice,
12+
this list of conditions and the following disclaimer in the documentation
13+
and/or other materials provided with the distribution.
14+
15+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
*/
26+
package org.sireum
27+
28+
@ext object JacksonJsonParser {
29+
def parse(uriOpt: Option[String], content: String): parser.json.AST = $
30+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
Copyright (c) 2017-2026,Robby, Kansas State University
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
8+
1. Redistributions of source code must retain the above copyright notice, this
9+
list of conditions and the following disclaimer.
10+
2. Redistributions in binary form must reproduce the above copyright notice,
11+
this list of conditions and the following disclaimer in the documentation
12+
and/or other materials provided with the distribution.
13+
14+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
18+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24+
*/
25+
package org.sireum
26+
27+
import com.fasterxml.jackson.core.{JsonFactory, JsonParser => JacksonParser, JsonToken}
28+
29+
object JacksonJsonParser_Ext {
30+
31+
private val factory: JsonFactory = new JsonFactory()
32+
33+
def parse(uriOpt: Option[String], content: String): parser.json.AST = {
34+
val jp = factory.createParser(content.value)
35+
jp.nextToken()
36+
val result = parseValue(uriOpt, jp)
37+
jp.close()
38+
result
39+
}
40+
41+
private def posOpt(uriOpt: Option[String], jp: JacksonParser): Option[message.Position] = {
42+
val loc = jp.currentTokenLocation()
43+
val line = loc.getLineNr
44+
val col = loc.getColumnNr
45+
val offset = loc.getCharOffset
46+
val endLoc = jp.currentLocation()
47+
val endLine = endLoc.getLineNr
48+
val endCol = endLoc.getColumnNr
49+
val endOffset = endLoc.getCharOffset
50+
val length = endOffset - offset
51+
Some(message.FlatPos(
52+
uriOpt = uriOpt,
53+
beginLine32 = conversions.Z.toU32(line),
54+
beginColumn32 = conversions.Z.toU32(col),
55+
endLine32 = conversions.Z.toU32(endLine),
56+
endColumn32 = conversions.Z.toU32(endCol),
57+
offset32 = conversions.Z.toU32(offset),
58+
length32 = conversions.Z.toU32(length)
59+
))
60+
}
61+
62+
private def parseValue(uriOpt: Option[String], jp: JacksonParser): parser.json.AST = {
63+
jp.currentToken() match {
64+
case JsonToken.START_OBJECT => parseObject(uriOpt, jp)
65+
case JsonToken.START_ARRAY => parseArray(uriOpt, jp)
66+
case JsonToken.VALUE_STRING =>
67+
val pOpt = posOpt(uriOpt, jp)
68+
val v = parser.json.AST.Str(String(jp.getText), pOpt)
69+
v
70+
case JsonToken.VALUE_NUMBER_INT =>
71+
val pOpt = posOpt(uriOpt, jp)
72+
parser.json.AST.Int(Z.$String(jp.getText), pOpt)
73+
case JsonToken.VALUE_NUMBER_FLOAT =>
74+
val pOpt = posOpt(uriOpt, jp)
75+
parser.json.AST.Dbl(F64(jp.getDoubleValue), pOpt)
76+
case JsonToken.VALUE_TRUE =>
77+
val pOpt = posOpt(uriOpt, jp)
78+
parser.json.AST.Bool(T, pOpt)
79+
case JsonToken.VALUE_FALSE =>
80+
val pOpt = posOpt(uriOpt, jp)
81+
parser.json.AST.Bool(F, pOpt)
82+
case JsonToken.VALUE_NULL =>
83+
val pOpt = posOpt(uriOpt, jp)
84+
parser.json.AST.Null(pOpt)
85+
case token => halt(s"Unexpected JSON token: $token")
86+
}
87+
}
88+
89+
private def parseObject(uriOpt: Option[String], jp: JacksonParser): parser.json.AST.Obj = {
90+
val pOpt = posOpt(uriOpt, jp)
91+
var keyValues = ISZ[parser.json.AST.KeyValue]()
92+
jp.nextToken()
93+
while (jp.currentToken() != JsonToken.END_OBJECT) {
94+
val keyPosOpt = posOpt(uriOpt, jp)
95+
val key = parser.json.AST.Str(String(jp.getText), keyPosOpt)
96+
jp.nextToken()
97+
val value = parseValue(uriOpt, jp)
98+
keyValues = keyValues :+ parser.json.AST.KeyValue(key, value)
99+
jp.nextToken()
100+
}
101+
parser.json.AST.Obj(keyValues, pOpt)
102+
}
103+
104+
private def parseArray(uriOpt: Option[String], jp: JacksonParser): parser.json.AST.Arr = {
105+
val pOpt = posOpt(uriOpt, jp)
106+
var values = ISZ[parser.json.AST]()
107+
jp.nextToken()
108+
while (jp.currentToken() != JsonToken.END_ARRAY) {
109+
values = values :+ parseValue(uriOpt, jp)
110+
jp.nextToken()
111+
}
112+
parser.json.AST.Arr(values, pOpt)
113+
}
114+
}
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*
2+
Copyright (c) 2017-2026,Robby, Kansas State University
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
8+
1. Redistributions of source code must retain the above copyright notice, this
9+
list of conditions and the following disclaimer.
10+
2. Redistributions in binary form must reproduce the above copyright notice,
11+
this list of conditions and the following disclaimer in the documentation
12+
and/or other materials provided with the distribution.
13+
14+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
18+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24+
*/
25+
26+
package org.sireum
27+
28+
import org.sireum.test._
29+
30+
class JacksonJsonParserBenchmarkTest extends TestSuite {
31+
32+
val benchmarkFiles: ISZ[(String, String)] = ISZ(
33+
("citm_catalog.json", "https://raw.githubusercontent.com/miloyip/nativejson-benchmark/master/data/citm_catalog.json"),
34+
("twitter.json", "https://raw.githubusercontent.com/miloyip/nativejson-benchmark/master/data/twitter.json"),
35+
("canada.json", "https://raw.githubusercontent.com/miloyip/nativejson-benchmark/master/data/canada.json"),
36+
("mesh.json", "https://raw.githubusercontent.com/simdjson/simdjson-data/master/jsonexamples/mesh.json"),
37+
("apache_builds.json", "https://raw.githubusercontent.com/simdjson/simdjson-data/master/jsonexamples/apache_builds.json"),
38+
("instruments.json", "https://raw.githubusercontent.com/simdjson/simdjson-data/master/jsonexamples/instruments.json"),
39+
("update-center.json", "https://raw.githubusercontent.com/simdjson/simdjson-data/master/jsonexamples/update-center.json"),
40+
("numbers.json", "https://raw.githubusercontent.com/simdjson/simdjson-data/master/jsonexamples/numbers.json"),
41+
("gsoc-2018.json", "https://raw.githubusercontent.com/simdjson/simdjson-data/master/jsonexamples/gsoc-2018.json")
42+
)
43+
44+
val warmupIterations: Int = 3
45+
val benchmarkIterations: Int = 10
46+
47+
def parseJackson(content: String): parser.json.AST = {
48+
JacksonJsonParser.parse(None(), content)
49+
}
50+
51+
def parseLLkOpt(content: String): scala.Option[parser.json.AST] = {
52+
try {
53+
val reporter = message.Reporter.create
54+
val treeOpt = parser.JsonParser.parse(None(), content, reporter)
55+
treeOpt match {
56+
case Some(tree) if !reporter.hasError => scala.Some(parser.json.JsonAstBuilder(tree).build())
57+
case _ => scala.None
58+
}
59+
} catch {
60+
case _: StackOverflowError => scala.None
61+
case _: Throwable => scala.None
62+
}
63+
}
64+
65+
def benchmark(name: String, content: String): Unit = {
66+
val nameStr: Predef.String = name.value
67+
val sizeStr: Predef.String = content.size.toString
68+
69+
// Check if LLk can parse this file
70+
val llkResult = parseLLkOpt(content)
71+
72+
if (llkResult.isEmpty) {
73+
// Warmup Jackson only
74+
for (_ <- 0 until warmupIterations) {
75+
parseJackson(content)
76+
}
77+
78+
// Benchmark Jackson only
79+
val jacksonStart = System.nanoTime()
80+
for (_ <- 0 until benchmarkIterations) {
81+
parseJackson(content)
82+
}
83+
val jacksonMs = (System.nanoTime() - jacksonStart) / 1000000.0
84+
println(f" $nameStr%-25s size=$sizeStr%10s Jackson: ${jacksonMs}%10.2f ms LLk: N/A ratio: N/A")
85+
return
86+
}
87+
88+
// Verify both produce the same AST
89+
val jackson = parseJackson(content)
90+
assert(jackson == llkResult.get, s"AST mismatch for ${name.value}")
91+
92+
// Warmup
93+
for (_ <- 0 until warmupIterations) {
94+
parseJackson(content)
95+
parseLLkOpt(content)
96+
}
97+
98+
// Benchmark Jackson
99+
val jacksonStart = System.nanoTime()
100+
for (_ <- 0 until benchmarkIterations) {
101+
parseJackson(content)
102+
}
103+
val jacksonElapsed = System.nanoTime() - jacksonStart
104+
105+
// Benchmark LLk
106+
val llkStart = System.nanoTime()
107+
for (_ <- 0 until benchmarkIterations) {
108+
parseLLkOpt(content)
109+
}
110+
val llkElapsed = System.nanoTime() - llkStart
111+
112+
val jacksonMs = jacksonElapsed / 1000000.0
113+
val llkMs = llkElapsed / 1000000.0
114+
val ratio = llkMs / jacksonMs
115+
116+
println(f" $nameStr%-25s size=$sizeStr%10s Jackson: ${jacksonMs}%10.2f ms LLk: ${llkMs}%10.2f ms ratio: ${ratio}%6.2fx")
117+
}
118+
119+
val tests = Tests {
120+
121+
* - {
122+
val tmpDir = Os.tempDir()
123+
124+
println()
125+
println(s"JSON Parser Benchmark ($benchmarkIterations iterations, $warmupIterations warmup)")
126+
println(s"${"=" * 110}")
127+
128+
for (p <- benchmarkFiles) {
129+
val name = p._1
130+
val url = p._2
131+
val file = tmpDir / name
132+
if (!file.exists) {
133+
val ok = file.downloadFrom(url)
134+
assert(ok, s"Failed to download ${url.value}")
135+
}
136+
val content: String = file.read
137+
benchmark(name, content)
138+
}
139+
140+
println(s"${"=" * 110}")
141+
142+
tmpDir.removeAll()
143+
}
144+
145+
}
146+
}

0 commit comments

Comments
 (0)