Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,47 @@ describe.each(implementations)('jsonrepair [$name]', ({ jsonrepair }) => {
expect(jsonrepair('["," 2')).toBe('[",", 2]')
})

test('should escape unescaped double quotes in strings (issues #129, #144, #114, #151)', () => {
// Issue #144 - quotes followed by parentheses or another quote
expect(jsonrepair('{ "height": "53"" }')).toBe('{ "height": "53\\"" }')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also add a test case for #114, for strings like a 40" televison?

expect(jsonrepair('{ "height": "(5\'3")" }')).toBe('{ "height": "(5\'3\\")" }')
expect(jsonrepair('{"a": "test")" }')).toBe('{"a": "test\\")" }')
expect(jsonrepair('{"value": "foo(bar")"}')).toBe('{"value": "foo(bar\\")"}')

// Issue #129 - quotes followed by comma
expect(jsonrepair('{"a": "x "y", z"}')).toBe('{"a": "x \\"y\\", z"}')
expect(
jsonrepair('{"key": "become an "Airbnb-free zone", which is a political decision."}')
).toBe('{"key": "become an \\"Airbnb-free zone\\", which is a political decision."}')
expect(jsonrepair('{"key": "test "quoted", more text"}')).toBe(
'{"key": "test \\"quoted\\", more text"}'
)

// Issue #114 - unescaped quotes in measurement units like 65"
expect(jsonrepair('{"text": "I want to buy 65" television"}')).toBe(
'{"text": "I want to buy 65\\" television"}'
)
expect(jsonrepair('{"text": "a 40" TV"}')).toBe('{"text": "a 40\\" TV"}')
expect(jsonrepair('{"size": "12" x 15""}')).toBe('{"size": "12\\" x 15\\""}')

// Issue #151 - quotes followed by slash
expect(jsonrepair('{"value": "This is test "message/stream"}')).toBe(
'{"value": "This is test \\"message/stream"}'
)
expect(jsonrepair('{"name":"Parth","value":"This is test "message/stream"}')).toBe(
'{"name":"Parth","value":"This is test \\"message/stream"}'
)
expect(jsonrepair('{"path": "home/user"test/file"}')).toBe(
'{"path": "home/user\\"test/file"}'
)

// Quotes followed by letters (general case)
expect(jsonrepair('{"text": "hello "world today"}')).toBe('{"text": "hello \\"world today"}')

// Ensure normal cases still work
expect(jsonrepair('{"a": "x","b": "y"}')).toBe('{"a": "x","b": "y"}')
})

test('should replace special white space characters', () => {
expect(jsonrepair('{"a":\u00a0"foo\u00a0bar"}')).toBe('{"a": "foo\u00a0bar"}')
expect(jsonrepair('{"a":\u202F"foo"}')).toBe('{"a": "foo"}')
Expand Down
139 changes: 139 additions & 0 deletions src/regular/jsonrepair.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import {
regexUrlChar,
regexUrlStart,
removeAtIndex,
skipWhitespaceAtIndex,
isValidValueEndFollower,
stripLastOccurrence
} from '../utils/stringUtils.js'

Expand Down Expand Up @@ -519,6 +521,27 @@ export function jsonrepair(text: string): string {
) {
// The quote is followed by the end of the text, a delimiter,
// or a next value. So the quote is indeed the end of the string.

// Unified lookahead check for unescaped quotes (fixes #129, #144, #114, #151)
// Check if the quote is actually an unescaped quote inside the string
// by looking ahead to see if there's a "real" end quote followed by valid JSON delimiters
//
// We only need to do lookahead when the character immediately after the quote
// is "suspicious" - i.e., not a standard JSON delimiter but something that
// could indicate this quote is actually inside the string content
const needsLookahead = isUnescapedQuoteSuspicious(iQuote + 1)

if (needsLookahead) {
const validEndQuoteIndex = findNextValidEndQuote(iQuote + 1)
if (validEndQuoteIndex !== -1) {
// Found a valid end quote further ahead, so this quote is unescaped
output = output.substring(0, oBefore)
i = iQuote + 1
str = `${str.substring(0, oQuote)}\\${str.substring(oQuote)}`
continue
}
}

parseConcatenatedString()

return true
Expand Down Expand Up @@ -864,6 +887,122 @@ export function jsonrepair(text: string): string {
return prev
}

/**
* Check if the character(s) after a quote position indicate that the quote
* might be an unescaped quote inside the string content (suspicious),
* rather than a valid end quote.
*
* @param afterQuoteIndex - The position immediately after the quote
* @returns true if the position is suspicious, false if it looks like a valid end quote
*/
function isUnescapedQuoteSuspicious(afterQuoteIndex: number): boolean {
const charAfterQuote = text[afterQuoteIndex]

// End of text or whitespace followed by delimiter - not suspicious
if (charAfterQuote === undefined || isWhitespace(text, afterQuoteIndex)) {
return false
}

// Standard JSON delimiters after quote - not suspicious
if (charAfterQuote === '}' || charAfterQuote === ']' || charAfterQuote === ':') {
return false
}

// String concatenation operator - not suspicious
if (charAfterQuote === '+') {
return false
}

// Comma case: need to check what comes after the comma
if (charAfterQuote === ',') {
let j = skipWhitespaceAtIndex(text, afterQuoteIndex + 1)
const afterComma = text[j]
// If followed by a valid JSON value start that's NOT an identifier, not suspicious
// (identifiers need special handling as they could be unquoted keys OR string content)
if (
afterComma === undefined ||
afterComma === '}' ||
afterComma === ']' ||
isQuote(afterComma) ||
isDigit(afterComma) ||
afterComma === '-' ||
afterComma === '{' ||
afterComma === '['
) {
return false
}
// Check for comments (/* or //)
if (afterComma === '/' && (text[j + 1] === '*' || text[j + 1] === '/')) {
return false
}
// For identifiers, check if it's likely an unquoted key or string content
if (isFunctionNameCharStart(afterComma)) {
// Skip the identifier
let k = j
while (k < text.length && isFunctionNameChar(text[k])) {
k++
}
// Skip whitespace after the identifier
k = skipWhitespaceAtIndex(text, k)
// Check what comes after the identifier
// If it's followed by ':', it's an unquoted key (e.g., {a:'foo',b:'bar'}) - not suspicious
if (text[k] === ':') {
return false
}
// If it's a quote followed by ':', it's an unquoted key with quote (e.g., {"a":"foo",b":"bar"}) - not suspicious
if (isQuote(text[k])) {
let m = skipWhitespaceAtIndex(text, k + 1)
if (text[m] === ':') {
return false
}
}
// Otherwise, it's likely string content - suspicious
return true
}
// Comma followed by something else - suspicious
return true
}

// Any other character (letters, numbers, punctuation, etc.) - suspicious
return true
}

/**
* Look ahead to find a valid end quote for a string value.
* A valid end quote is a quote character followed by a valid JSON value delimiter
* (closing brace, bracket, comma, or end of text).
*
* Note: We exclude quotes followed by ':' because those are key quotes, not value quotes.
* This is important for cases like {"a":"foo",b":"bar"} where b" is an unquoted key.
*
* This helps detect unescaped quotes inside strings by checking if there's
* a "real" end quote further ahead. (fixes #129, #144, #114, #151)
*
* @param startIndex - The position to start searching from
* @returns The index of the valid end quote, or -1 if not found
*/
function findNextValidEndQuote(startIndex: number): number {
let j = startIndex

// Search for the next quote that could be a valid end quote
while (j < text.length) {
if (isQuote(text[j])) {
// Found a quote, check if it's followed by a valid JSON value delimiter
let k = skipWhitespaceAtIndex(text, j + 1)

// Check if what follows is a valid JSON structure continuation for a value
// Note: we exclude ':' because that would indicate this is a key quote, not a value quote
if (isValidValueEndFollower(text[k])) {
return j // This is a valid end quote for a value
}
}

j++
}

return -1 // No valid end quote found
}

function atEndOfNumber() {
return i >= text.length || isDelimiter(text[i]) || isWhitespace(text, i)
}
Expand Down
156 changes: 156 additions & 0 deletions src/streaming/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,27 @@ export function jsonrepairCore({
) {
// The quote is followed by the end of the text, a delimiter, or a next value
// so the quote is indeed the end of the string

// Unified lookahead check for unescaped quotes (fixes #129, #144, #114, #151)
// Check if the quote is actually an unescaped quote inside the string
// by looking ahead to see if there's a "real" end quote followed by valid JSON delimiters
//
// We only need to do lookahead when the character immediately after the quote
// is "suspicious" - i.e., not a standard JSON delimiter but something that
// could indicate this quote is actually inside the string content
const needsLookahead = isUnescapedQuoteSuspicious(iQuote + 1)

if (needsLookahead) {
const validEndQuoteIndex = findNextValidEndQuote(iQuote + 1)
if (validEndQuoteIndex !== -1) {
// Found a valid end quote further ahead, so this quote is unescaped
output.remove(oQuote + 1)
i = iQuote + 1
output.insertAt(oQuote, '\\')
continue
}
}

parseConcatenatedString()

return stack.update(Caret.afterValue)
Expand Down Expand Up @@ -1004,6 +1025,141 @@ export function jsonrepairCore({
return prev
}

/**
* Check if the character(s) after a quote position indicate that the quote
* might be an unescaped quote inside the string content (suspicious),
* rather than a valid end quote.
*
* @param afterQuoteIndex - The position immediately after the quote
* @returns true if the position is suspicious, false if it looks like a valid end quote
*/
function isUnescapedQuoteSuspicious(afterQuoteIndex: number): boolean {
const charAfterQuote = input.charAt(afterQuoteIndex)

// End of text or whitespace followed by delimiter - not suspicious
if (charAfterQuote === '' || isWhitespace(input, afterQuoteIndex)) {
return false
}

// Standard JSON delimiters after quote - not suspicious
if (charAfterQuote === '}' || charAfterQuote === ']' || charAfterQuote === ':') {
return false
}

// String concatenation operator - not suspicious
if (charAfterQuote === '+') {
return false
}

// Comma case: need to check what comes after the comma
if (charAfterQuote === ',') {
let j = afterQuoteIndex + 1
// Skip whitespace after comma
while (!input.isEnd(j) && isWhitespace(input, j)) {
j++
}
const afterComma = input.charAt(j)
// If followed by a valid JSON value start that's NOT an identifier, not suspicious
// (identifiers need special handling as they could be unquoted keys OR string content)
if (
afterComma === '' ||
afterComma === '}' ||
afterComma === ']' ||
isQuote(afterComma) ||
isDigit(afterComma) ||
afterComma === '-' ||
afterComma === '{' ||
afterComma === '['
) {
return false
}
// Check for comments (/* or //)
if (afterComma === '/' && (input.charAt(j + 1) === '*' || input.charAt(j + 1) === '/')) {
return false
}
// For identifiers, check if it's likely an unquoted key or string content
if (isFunctionNameCharStart(afterComma)) {
// Skip the identifier
let k = j
while (!input.isEnd(k) && isFunctionNameChar(input.charAt(k))) {
k++
}
// Skip whitespace after the identifier
while (!input.isEnd(k) && isWhitespace(input, k)) {
k++
}
// Check what comes after the identifier
// If it's followed by ':', it's an unquoted key (e.g., {a:'foo',b:'bar'}) - not suspicious
if (input.charAt(k) === ':') {
return false
}
// If it's a quote followed by ':', it's an unquoted key with quote (e.g., {"a":"foo",b":"bar"}) - not suspicious
if (isQuote(input.charAt(k))) {
let m = k + 1
while (!input.isEnd(m) && isWhitespace(input, m)) {
m++
}
if (input.charAt(m) === ':') {
return false
}
}
// Otherwise, it's likely string content - suspicious
return true
}
// Comma followed by something else - suspicious
return true
}

// Any other character (letters, numbers, punctuation, etc.) - suspicious
return true
}

/**
* Look ahead to find a valid end quote for a string value.
* A valid end quote is a quote character followed by a valid JSON value delimiter
* (closing brace, bracket, comma, or end of text).
*
* Note: We exclude quotes followed by ':' because those are key quotes, not value quotes.
* This is important for cases like {"a":"foo",b":"bar"} where b" is an unquoted key.
*
* This helps detect unescaped quotes inside strings by checking if there's
* a "real" end quote further ahead. (fixes #129, #144, #114, #151)
*
* @param startIndex - The position to start searching from
* @returns The index of the valid end quote, or -1 if not found
*/
function findNextValidEndQuote(startIndex: number): number {
let j = startIndex

// Search for the next quote that could be a valid end quote
while (!input.isEnd(j)) {
if (isQuote(input.charAt(j))) {
// Found a quote, check if it's followed by a valid JSON value delimiter
let k = j + 1

// Skip whitespace after the quote
while (!input.isEnd(k) && isWhitespace(input, k)) {
k++
}

// Check if what follows is a valid JSON structure continuation for a value
// Note: we exclude ':' because that would indicate this is a key quote, not a value quote
if (
input.isEnd(k) ||
input.charAt(k) === '}' ||
input.charAt(k) === ']' ||
input.charAt(k) === ','
) {
return j // This is a valid end quote for a value
}
}

j++
}

return -1 // No valid end quote found
}

function atEndOfNumber() {
return input.isEnd(i) || isDelimiter(input.charAt(i)) || isWhitespace(input, i)
}
Expand Down
Loading