@@ -111,74 +111,109 @@ protected Text _finishAndReturnText() throws IOException {
111111            return  null ;
112112        }
113113
114-         // Instead of tracking backslash positions, directly build the result 
115-         byte [] resultBuffer  = new  byte [max  - startPtr ]; // Pessimistic size 
116-         int  writePos  = ptr  - startPtr ;
114+         int [] escapePositions  = new  int [16 ]; // Small initial size 
117115
118-         // Copy everything before the first backslash 
119-         System .arraycopy (inputBuffer , startPtr , resultBuffer , 0 , writePos );
120- 
121-         stringLength  = writePos ;
116+         int  escapeCount  = 0 ;
117+         int  scanPtr  = ptr ;
122118
123-         while  (ptr  < max ) {
124-             byte  b  = inputBuffer [ptr ];
119+         // Scan to find escapes and end quote 
120+         while  (scanPtr  < max ) {
121+             byte  b  = inputBuffer [scanPtr ];
125122            if  (b  == INT_QUOTE ) {
126-                 // End of string 
127-                 stringEnd  = ptr  + 1 ;
128-                 // Create result with exact size 
129-                 if  (writePos  == resultBuffer .length ) {
130-                     return  new  Text (new  XContentString .UTF8Bytes (resultBuffer ), stringLength );
131-                 } else  {
132-                     byte [] exact  = new  byte [writePos ];
133-                     System .arraycopy (resultBuffer , 0 , exact , 0 , writePos );
134-                     return  new  Text (new  XContentString .UTF8Bytes (exact ), stringLength );
135-                 }
123+                 break ; // Found end 
136124            }
137125
138126            if  (b  == INT_BACKSLASH ) {
139-                 ptr ++;
140-                 if  (ptr  >= max ) {
127+                 // Grow array if needed 
128+                 if  (escapeCount  >= escapePositions .length ) {
129+                     int [] newArray  = new  int [escapePositions .length  * 2 ];
130+                     System .arraycopy (escapePositions , 0 , newArray , 0 , escapeCount );
131+                     escapePositions  = newArray ;
132+                 }
133+                 escapePositions [escapeCount ++] = scanPtr ;
134+ 
135+                 scanPtr ++;
136+                 if  (scanPtr  >= max ) {
141137                    return  null ;
142138                }
143-                 b  = inputBuffer [ptr ];
144-                 // Only handle simple escapes 
139+                 b  = inputBuffer [scanPtr ];
145140                if  (b  == '"'  || b  == '/'  || b  == '\\' ) {
146-                     resultBuffer [writePos ++] = b ;
147-                     ptr ++;
148-                     stringLength ++;
141+                     scanPtr ++;
142+                 } else  {
143+                     return  null ; // Unsupported escape 
144+                 }
145+             } else  if  (b  >= 0 ) {
146+                 scanPtr ++;
147+             } else  {
148+                 // Non-ASCII 
149+                 int  c  = b  & 0xFF ;
150+                 int  codeType  = INPUT_CODES_UTF8 [c ];
151+                 if  (codeType  == 0 ) {
152+                     scanPtr ++;
153+                 } else  if  (codeType  >= 2  && codeType  <= 4 ) {
154+                     if  (scanPtr  + codeType  > max ) {
155+                         return  null ;
156+                     }
157+                     scanPtr  += codeType ;
149158                } else  {
150-                     // Unsupported escape 
151159                    return  null ;
152160                }
161+             }
162+         }
163+ 
164+         if  (scanPtr  >= max ) {
165+             return  null ; // Didn't find closing quote 
166+         }
167+ 
168+         stringEnd  = scanPtr  + 1 ;
169+ 
170+         // Calculate exact byte size: total bytes minus number of backslashes 
171+         int  exactByteSize  = (scanPtr  - startPtr ) - escapeCount ;
172+ 
173+         // Allocate exact size buffer 
174+         byte [] resultBuffer  = new  byte [exactByteSize ];
175+         int  writePos  = 0 ;
176+ 
177+         // Copy everything before the first backslash 
178+         int  beforeEscapeLength  = ptr  - startPtr ;
179+         int  resultCharCount  = beforeEscapeLength ;
180+         System .arraycopy (inputBuffer , startPtr , resultBuffer , 0 , beforeEscapeLength );
181+         writePos  = beforeEscapeLength ;
182+ 
183+         // Second pass: process escapes 
184+         while  (ptr  < scanPtr ) {
185+             byte  b  = inputBuffer [ptr ];
186+ 
187+             if  (b  == INT_BACKSLASH ) {
188+                 ptr ++; // Skip backslash 
189+                 b  = inputBuffer [ptr ]; // Get escaped character 
190+                 resultBuffer [writePos ++] = b ;
191+                 resultCharCount ++;
192+                 ptr ++;
153193            } else  if  (b  >= 0 ) {
154194                // ASCII 
155195                resultBuffer [writePos ++] = b ;
196+                 resultCharCount ++;
156197                ptr ++;
157-                 stringLength ++;
158198            } else  {
159-                 // Non-ASCII 
199+                 // Non-ASCII - copy multi-byte sequence  
160200                int  c  = b  & 0xFF ;
161201                int  codeType  = INPUT_CODES_UTF8 [c ];
162202                if  (codeType  == 0 ) {
163203                    resultBuffer [writePos ++] = b ;
204+                     resultCharCount ++;
164205                    ptr ++;
165-                     stringLength ++;
166206                } else  if  (codeType  >= 2  && codeType  <= 4 ) {
167-                     if  (ptr  + codeType  > max ) {
168-                         return  null ;
169-                     }
170-                     // Copy multi-byte sequence 
171207                    System .arraycopy (inputBuffer , ptr , resultBuffer , writePos , codeType );
172208                    writePos  += codeType ;
209+                     resultCharCount ++;
173210                    ptr  += codeType ;
174-                     stringLength ++;
175-                 } else  {
176-                     return  null ;
177211                }
178212            }
179213        }
180214
181-         return  null ; // Didn't find closing quote 
215+         stringLength  = resultCharCount ;
216+         return  new  Text (new  XContentString .UTF8Bytes (resultBuffer ), stringLength );
182217    }
183218
184219    public  boolean  writeUTF8TextToStream (OutputStream  out ) throws  IOException  {
0 commit comments