Skip to content

Commit fa153c7

Browse files
Copilotextremeheat
andauthored
Add support for single-quoted strings in mojangson parser (#53)
* Initial plan * Add support for single-quoted strings in mojangson parser Co-authored-by: extremeheat <[email protected]> --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: extremeheat <[email protected]>
1 parent 8526165 commit fa153c7

File tree

3 files changed

+100
-2
lines changed

3 files changed

+100
-2
lines changed

grammar.ne

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ JVALUE -> JOBJECT {% (d) => d[0] %}
66
| "'" _ JOBJECT _ "'" {% (d) => d[2] %}
77
| JARRAY {% (d) => d[0] %}
88
| STRING {% (d) => d[0] %}
9+
| SINGLE_QUOTED_STRING {% (d) => d[0] %}
910
| "null" {% (d) => null %}
1011

1112
JOBJECT -> "{" _ "}" {% (d) => { return { type: 'compound', value: {} } } %}
@@ -21,6 +22,8 @@ PAIR -> STRING _ ":" _ JVALUE {% (d) => [d[0].value, d[4]] %}
2122
STRING -> "\"" ( [^\\"] | "\\" ["bfnrt\/\\] | "\\u" [a-fA-F0-9] [a-fA-F0-9] [a-fA-F0-9] [a-fA-F0-9] ):* "\"" {% (d) => parseValue( JSON.parse(d.flat(3).map(b => b.replace('\n', '\\n')).join('')) ) %}
2223
| [^\"\'}\]:;,\s]:+ {% (d) => parseValue(d[0].join('')) %}
2324

25+
SINGLE_QUOTED_STRING -> "'" ( [^\\'] | "\\" ["bfnrt\/\\'] | "\\u" [a-fA-F0-9] [a-fA-F0-9] [a-fA-F0-9] [a-fA-F0-9] ):* "'" {% (d) => parseSingleQuoteString(d) %}
26+
2427
@{%
2528

2629
// Because of unquoted strings, parsing can be ambiguous.
@@ -48,6 +51,38 @@ function parseValue (str) {
4851
return { value: str, type: 'string' }
4952
}
5053

54+
function parseSingleQuoteString(d) {
55+
// Build the string content from the parsed parts similar to double-quoted strings
56+
// The structure is: ["'", [content parts], "'"]
57+
// d[1] contains the content between quotes
58+
const content = d[1] || []
59+
let str = "'"
60+
for (const part of content) {
61+
if (Array.isArray(part)) {
62+
str += part.flat().join('')
63+
} else if (part) {
64+
str += part
65+
}
66+
}
67+
str += "'"
68+
69+
// Process escape sequences to convert to actual string value
70+
// Replace escaped single quotes with actual single quotes
71+
// and handle other escape sequences
72+
str = str.replace(/\\'/g, "\\'") // Keep escaped single quotes for JSON parsing
73+
.replace(/\\"/g, '\\"') // Keep escaped double quotes
74+
75+
// Convert to a JSON-compatible string by replacing outer single quotes with double quotes
76+
str = '"' + str.slice(1, -1).replace(/"/g, '\\"').replace(/\\'/g, "'") + '"'
77+
78+
try {
79+
return { value: JSON.parse(str), type: 'string' }
80+
} catch (e) {
81+
// If JSON parsing fails, return the raw string content
82+
return { value: str.slice(1, -1), type: 'string' }
83+
}
84+
}
85+
5186
function extractPair(kv, output) {
5287
if (kv[0] !== undefined) {
5388
output[kv[0]] = kv[1]

index.js

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,68 @@ function parse (text) {
9292
try {
9393
const parserNE = new nearley.Parser(nearley.Grammar.fromCompiled(grammar))
9494
parserNE.feed(text)
95-
return parserNE.results[0]
95+
// When there are multiple parse results (ambiguous grammar),
96+
// prefer results with more structured types (compound, list) over strings
97+
const results = parserNE.results
98+
if (results.length > 1) {
99+
// Score each result based on how "structured" it is
100+
const scored = results.map((r, i) => {
101+
const score = scoreResult(r)
102+
return { result: r, score }
103+
})
104+
// Sort by score descending (higher score = more structured)
105+
scored.sort((a, b) => b.score - a.score)
106+
return scored[0].result
107+
}
108+
return results[0]
96109
} catch (e) {
97110
e.message = `Error parsing text '${text}'`
98111
throw e
99112
}
100113
}
101114

115+
function scoreResult (obj) {
116+
let score = 0
117+
if (!obj || typeof obj !== 'object') return score
118+
119+
// Prefer compound and list types over string types
120+
if (obj.type === 'compound') score += 10
121+
if (obj.type === 'list') score += 10
122+
if (obj.type === 'string') score -= 1
123+
124+
// Recursively score nested structures
125+
if (obj.value && typeof obj.value === 'object') {
126+
if (Array.isArray(obj.value)) {
127+
obj.value.forEach(item => {
128+
score += scoreResult(item)
129+
})
130+
} else if (obj.value.value && Array.isArray(obj.value.value)) {
131+
// This is a list type with value.value array
132+
// The items in this array are raw values, not wrapped in { type, value }
133+
// Score based on whether items are objects (compounds) vs primitives
134+
obj.value.value.forEach(item => {
135+
if (item && typeof item === 'object' && !Array.isArray(item)) {
136+
// This is likely a compound object
137+
score += 10
138+
// Recursively score the object's properties
139+
Object.values(item).forEach(prop => {
140+
score += scoreResult(prop)
141+
})
142+
} else if (typeof item === 'string') {
143+
score -= 1
144+
}
145+
})
146+
} else {
147+
// This is a compound with nested values
148+
Object.values(obj.value).forEach(item => {
149+
score += scoreResult(item)
150+
})
151+
}
152+
}
153+
154+
return score
155+
}
156+
102157
module.exports = {
103158
parse,
104159
simplify,

test/test.js

Lines changed: 9 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)