Skip to content

Commit 25c4d6e

Browse files
committed
Change
1 parent 2a8dd7c commit 25c4d6e

File tree

1 file changed

+73
-38
lines changed

1 file changed

+73
-38
lines changed

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java

Lines changed: 73 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -111,74 +111,109 @@ protected Text _finishAndReturnText() throws IOException {
111111
return null;
112112
}
113113

114-
// Instead of tracking backslash positions, directly build the result
115-
byte[] resultBuffer = new byte[max - startPtr]; // Pessimistic size
116-
int writePos = ptr - startPtr;
114+
int[] escapePositions = new int[16]; // Small initial size
117115

118-
// Copy everything before the first backslash
119-
System.arraycopy(inputBuffer, startPtr, resultBuffer, 0, writePos);
120-
121-
stringLength = writePos;
116+
int escapeCount = 0;
117+
int scanPtr = ptr;
122118

123-
while (ptr < max) {
124-
byte b = inputBuffer[ptr];
119+
// Scan to find escapes and end quote
120+
while (scanPtr < max) {
121+
byte b = inputBuffer[scanPtr];
125122
if (b == INT_QUOTE) {
126-
// End of string
127-
stringEnd = ptr + 1;
128-
// Create result with exact size
129-
if (writePos == resultBuffer.length) {
130-
return new Text(new XContentString.UTF8Bytes(resultBuffer), stringLength);
131-
} else {
132-
byte[] exact = new byte[writePos];
133-
System.arraycopy(resultBuffer, 0, exact, 0, writePos);
134-
return new Text(new XContentString.UTF8Bytes(exact), stringLength);
135-
}
123+
break; // Found end
136124
}
137125

138126
if (b == INT_BACKSLASH) {
139-
ptr++;
140-
if (ptr >= max) {
127+
// Grow array if needed
128+
if (escapeCount >= escapePositions.length) {
129+
int[] newArray = new int[escapePositions.length * 2];
130+
System.arraycopy(escapePositions, 0, newArray, 0, escapeCount);
131+
escapePositions = newArray;
132+
}
133+
escapePositions[escapeCount++] = scanPtr;
134+
135+
scanPtr++;
136+
if (scanPtr >= max) {
141137
return null;
142138
}
143-
b = inputBuffer[ptr];
144-
// Only handle simple escapes
139+
b = inputBuffer[scanPtr];
145140
if (b == '"' || b == '/' || b == '\\') {
146-
resultBuffer[writePos++] = b;
147-
ptr++;
148-
stringLength++;
141+
scanPtr++;
142+
} else {
143+
return null; // Unsupported escape
144+
}
145+
} else if (b >= 0) {
146+
scanPtr++;
147+
} else {
148+
// Non-ASCII
149+
int c = b & 0xFF;
150+
int codeType = INPUT_CODES_UTF8[c];
151+
if (codeType == 0) {
152+
scanPtr++;
153+
} else if (codeType >= 2 && codeType <= 4) {
154+
if (scanPtr + codeType > max) {
155+
return null;
156+
}
157+
scanPtr += codeType;
149158
} else {
150-
// Unsupported escape
151159
return null;
152160
}
161+
}
162+
}
163+
164+
if (scanPtr >= max) {
165+
return null; // Didn't find closing quote
166+
}
167+
168+
stringEnd = scanPtr + 1;
169+
170+
// Calculate exact byte size: total bytes minus number of backslashes
171+
int exactByteSize = (scanPtr - startPtr) - escapeCount;
172+
173+
// Allocate exact size buffer
174+
byte[] resultBuffer = new byte[exactByteSize];
175+
int writePos = 0;
176+
177+
// Copy everything before the first backslash
178+
int beforeEscapeLength = ptr - startPtr;
179+
int resultCharCount = beforeEscapeLength;
180+
System.arraycopy(inputBuffer, startPtr, resultBuffer, 0, beforeEscapeLength);
181+
writePos = beforeEscapeLength;
182+
183+
// Second pass: process escapes
184+
while (ptr < scanPtr) {
185+
byte b = inputBuffer[ptr];
186+
187+
if (b == INT_BACKSLASH) {
188+
ptr++; // Skip backslash
189+
b = inputBuffer[ptr]; // Get escaped character
190+
resultBuffer[writePos++] = b;
191+
resultCharCount++;
192+
ptr++;
153193
} else if (b >= 0) {
154194
// ASCII
155195
resultBuffer[writePos++] = b;
196+
resultCharCount++;
156197
ptr++;
157-
stringLength++;
158198
} else {
159-
// Non-ASCII
199+
// Non-ASCII - copy multi-byte sequence
160200
int c = b & 0xFF;
161201
int codeType = INPUT_CODES_UTF8[c];
162202
if (codeType == 0) {
163203
resultBuffer[writePos++] = b;
204+
resultCharCount++;
164205
ptr++;
165-
stringLength++;
166206
} else if (codeType >= 2 && codeType <= 4) {
167-
if (ptr + codeType > max) {
168-
return null;
169-
}
170-
// Copy multi-byte sequence
171207
System.arraycopy(inputBuffer, ptr, resultBuffer, writePos, codeType);
172208
writePos += codeType;
209+
resultCharCount++;
173210
ptr += codeType;
174-
stringLength++;
175-
} else {
176-
return null;
177211
}
178212
}
179213
}
180214

181-
return null; // Didn't find closing quote
215+
stringLength = resultCharCount;
216+
return new Text(new XContentString.UTF8Bytes(resultBuffer), stringLength);
182217
}
183218

184219
public boolean writeUTF8TextToStream(OutputStream out) throws IOException {

0 commit comments

Comments
 (0)