55import  java .math .BigInteger ;
66import  java .nio .charset .Charset ;
77import  java .nio .charset .StandardCharsets ;
8- import  java .util .*;
8+ import  java .util .ArrayList ;
9+ import  java .util .Arrays ;
10+ import  java .util .Stack ;
911
1012import  com .fasterxml .jackson .core .*;
1113import  com .fasterxml .jackson .core .base .ParserMinimalBase ;
1214import  com .fasterxml .jackson .core .io .IOContext ;
1315import  com .fasterxml .jackson .core .io .NumberInput ;
1416import  com .fasterxml .jackson .core .json .DupDetector ;
1517import  com .fasterxml .jackson .core .sym .ByteQuadsCanonicalizer ;
16- import  com .fasterxml .jackson .core .util .*;
18+ import  com .fasterxml .jackson .core .util .ByteArrayBuilder ;
19+ import  com .fasterxml .jackson .core .util .JacksonFeatureSet ;
20+ import  com .fasterxml .jackson .core .util .TextBuffer ;
1721
1822import  static  com .fasterxml .jackson .dataformat .cbor .CBORConstants .*;
1923
@@ -2289,10 +2293,9 @@ protected void _finishToken() throws IOException
22892293
22902294        if  ((available  >= len )
22912295                // if not, could we read? NOTE: we do not require it, just attempt to read 
2292-                     || ((_inputBuffer .length  >= len )
2293-                             && _tryToLoadToHaveAtLeast (len ))) {
2294-                 _finishShortText (len );
2295-                 return ;
2296+                 || _tryToLoadToHaveAtLeast (len )) {
2297+             _finishShortText (len );
2298+             return ;
22962299        }
22972300        // If not enough space, need handling similar to chunked 
22982301        _finishLongText (len );
@@ -2331,11 +2334,9 @@ protected String _finishTextToken(int ch) throws IOException
23312334        //    due to inputBuffer never being even close to that big). 
23322335
23332336        final  int  available  = _inputEnd  - _inputPtr ;
2334- 
23352337        if  ((available  >= len )
23362338            // if not, could we read? NOTE: we do not require it, just attempt to read 
2337-                 || ((_inputBuffer .length  >= len )
2338-                         && _tryToLoadToHaveAtLeast (len ))) {
2339+                 || _tryToLoadToHaveAtLeast (len )) {
23392340            return  _finishShortText (len );
23402341        }
23412342        // If not enough space, need handling similar to chunked 
@@ -2364,19 +2365,22 @@ private final String _finishShortText(int len) throws IOException
23642365
23652366        // Let's actually do a tight loop for ASCII first: 
23662367        final  int  end  = _inputPtr ;
2367- 
2368-         int   i ; 
2369-         while  (( i  = inputBuf [inPtr ]) >=  0 ) { 
2368+          int   i  =  0 ; 
2369+         while  ( inPtr  <  end  &&  i  >=  0 ) { 
2370+              i  = inputBuf [inPtr ++]; 
23702371            outBuf [outPtr ++] = (char ) i ;
2371-             if  (++inPtr  == end ) {
2372-                 String  str  = _textBuffer .setCurrentAndReturn (outPtr );
2373-                 if  (stringRefs  != null ) {
2374-                     stringRefs .stringRefs .add (str );
2375-                     _sharedString  = str ;
2376-                 }
2377-                 return  str ;
2372+         }
2373+         if  (inPtr  == end  && i  >= 0 ) {
2374+             String  str  = _textBuffer .setCurrentAndReturn (outPtr );
2375+             if  (stringRefs  != null ) {
2376+                 stringRefs .stringRefs .add (str );
2377+                 _sharedString  = str ;
23782378            }
2379+             return  str ;
23792380        }
2381+         // Correct extra increments 
2382+         outPtr  -= 1 ;
2383+         inPtr  -= 1 ;
23802384        final  int [] codes  = UTF8_UNIT_CODES ;
23812385        do  {
23822386            i  = inputBuf [inPtr ++] & 0xFF ;
@@ -2443,10 +2447,17 @@ private final String _finishShortText(int len) throws IOException
24432447
24442448    private  final  String  _finishLongText (int  len ) throws  IOException 
24452449    {
2446-         char [] outBuf  = _textBuffer .emptyAndGetCurrentSegment ();
2447-         int  outPtr  = 0 ;
2448-         final  int [] codes  = UTF8_UNIT_CODES ;
2450+         StringRefList  stringRefs  = null ;
2451+         if  (!_stringRefs .empty () &&
2452+                 shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2453+             stringRefs  = _stringRefs .peek ();
2454+         }
2455+         // First a tight loop for ASCII. 
2456+         len  = _finishLongTextAscii (len );
2457+         char [] outBuf  = _textBuffer .getBufferWithoutReset ();
2458+         int  outPtr  = _textBuffer .getCurrentSegmentSize ();
24492459        int  outEnd  = outBuf .length ;
2460+         final  int [] codes  = UTF8_UNIT_CODES ;
24502461
24512462        while  (--len  >= 0 ) {
24522463            int  c  = _nextByte () & 0xFF ;
@@ -2500,14 +2511,51 @@ private final String _finishLongText(int len) throws IOException
25002511            outBuf [outPtr ++] = (char ) c ;
25012512        }
25022513        String  str  = _textBuffer .setCurrentAndReturn (outPtr );
2503-         if  (!_stringRefs .empty () &&
2504-                 shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2505-             _stringRefs .peek ().stringRefs .add (str );
2514+         if  (stringRefs  != null ) {
2515+             stringRefs .stringRefs .add (str );
25062516            _sharedString  = str ;
25072517        }
25082518        return  str ;
25092519    }
25102520
2521+     /** 
2522+      * Consumes as many ascii chars as possible in a tight loop. Returns the amount of bytes remaining. 
2523+      */ 
2524+     private  final  int  _finishLongTextAscii (int  len ) throws  IOException 
2525+     {
2526+         char [] outBuf  = _textBuffer .emptyAndGetCurrentSegment ();
2527+         final  byte [] input  = _inputBuffer ;
2528+         while  (len  > 0 ) {
2529+             // load as much input as possible 
2530+             int  size  = Math .min (len , Math .min (outBuf .length , input .length ));
2531+             if  (!_tryToLoadToHaveAtLeast (size )) {
2532+                 return  len ;
2533+             }
2534+             int  outEnd  = size ;
2535+             int  outPtr  = 0 ;
2536+             int  inPtr  = _inputPtr ;
2537+             int  i  = 0 ;
2538+             // Tight loop to copy into the output buffer, bail if a non-ascii char is found 
2539+             while  (outPtr  < outEnd  && i  >= 0 ) {
2540+                 i  = input [inPtr ++];
2541+                 outBuf [outPtr ++] = (char ) i ;
2542+             }
2543+             // Found a non-ascii char, correct pointers and return to the caller. 
2544+             if  (i  < 0 ) {
2545+                 --outPtr ;
2546+                 _inputPtr  = inPtr  - 1 ;
2547+                 _textBuffer .setCurrentLength (outPtr );
2548+                 return  len  - outPtr ;
2549+             }
2550+             _inputPtr  = inPtr ;
2551+             if  (outPtr  >= outBuf .length ) {
2552+                 outBuf  = _textBuffer .finishCurrentSegment ();
2553+             }
2554+             len  -= size ;
2555+         }
2556+         return  len ;
2557+     }
2558+ 
25112559    private  final  void  _finishChunkedText () throws  IOException 
25122560    {
25132561        char [] outBuf  = _textBuffer .emptyAndGetCurrentSegment ();
@@ -2532,7 +2580,6 @@ private final void _finishChunkedText() throws IOException
25322580                        }
25332581                        break ;
25342582                    }
2535-                     _chunkLeft  = len ;
25362583                    int  end  = _inputPtr  + len ;
25372584                    if  (end  <= _inputEnd ) { // all within buffer 
25382585                        _chunkLeft  = 0 ;
@@ -2541,19 +2588,22 @@ private final void _finishChunkedText() throws IOException
25412588                        _chunkLeft  = (end  - _inputEnd );
25422589                        _chunkEnd  = _inputEnd ;
25432590                    }
2544-                 }
2545-                 // besides of which just need to ensure there's content 
2546-                 if  (_inputPtr  >= _inputEnd ) { // end of buffer, but not necessarily chunk 
2547-                     loadMoreGuaranteed ();
2548-                     int  end  = _inputPtr  + _chunkLeft ;
2549-                     if  (end  <= _inputEnd ) { // all within buffer 
2550-                         _chunkLeft  = 0 ;
2551-                         _chunkEnd  = end ;
2552-                     } else  { // stretches beyond 
2553-                         _chunkLeft  = (end  - _inputEnd );
2554-                         _chunkEnd  = _inputEnd ;
2591+                     // start of a new chunk 
2592+                     // First a tight loop for ASCII. 
2593+                     _textBuffer .setCurrentLength (outPtr );
2594+                     if  (_finishChunkedTextAscii ()) {
2595+                         // chunk fully consumed, let's get the next one 
2596+                         outBuf  = _textBuffer .getBufferWithoutReset ();
2597+                         outPtr  = _textBuffer .getCurrentSegmentSize ();
2598+                         outEnd  = outBuf .length ;
2599+                         continue ;
25552600                    }
2601+                     outBuf  = _textBuffer .getBufferWithoutReset ();
2602+                     outPtr  = _textBuffer .getCurrentSegmentSize ();
2603+                     outEnd  = outBuf .length ;
25562604                }
2605+                 // besides of which just need to ensure there's content 
2606+                 _loadMoreForChunkIfNeeded ();
25572607            }
25582608            int  c  = input [_inputPtr ++] & 0xFF ;
25592609            int  code  = codes [c ];
@@ -2563,9 +2613,9 @@ private final void _finishChunkedText() throws IOException
25632613            }
25642614
25652615            switch  (code ) {
2566-             case  0 :
2567-                 break ;
2568-             case  1 : // 2-byte UTF 
2616+                  case  0 :
2617+                      break ;
2618+                  case  1 : // 2-byte UTF 
25692619                {
25702620                    int  d  = _nextChunkedByte ();
25712621                    if  ((d  & 0xC0 ) != 0x080 ) {
@@ -2574,24 +2624,24 @@ private final void _finishChunkedText() throws IOException
25742624                    c  = ((c  & 0x1F ) << 6 ) | (d  & 0x3F );
25752625                }
25762626                break ;
2577-             case  2 : // 3-byte UTF 
2578-                 c  = _decodeChunkedUTF8_3 (c );
2579-                 break ;
2580-             case  3 : // 4-byte UTF 
2581-                 c  = _decodeChunkedUTF8_4 (c );
2582-                 // Let's add first part right away: 
2583-                 if  (outPtr  >= outBuf .length ) {
2584-                     outBuf  = _textBuffer .finishCurrentSegment ();
2585-                     outPtr  = 0 ;
2586-                     outEnd  = outBuf .length ;
2587-                 }
2588-                 outBuf [outPtr ++] = (char ) (0xD800  | (c  >> 10 ));
2589-                 c  = 0xDC00  | (c  & 0x3FF );
2590-                 // And let the other char output down below 
2591-                 break ;
2592-             default :
2593-                 // Is this good enough error message? 
2594-                 _reportInvalidInitial (c );
2627+                  case  2 : // 3-byte UTF 
2628+                      c  = _decodeChunkedUTF8_3 (c );
2629+                      break ;
2630+                  case  3 : // 4-byte UTF 
2631+                      c  = _decodeChunkedUTF8_4 (c );
2632+                      // Let's add first part right away: 
2633+                      if  (outPtr  >= outBuf .length ) {
2634+                          outBuf  = _textBuffer .finishCurrentSegment ();
2635+                          outPtr  = 0 ;
2636+                          outEnd  = outBuf .length ;
2637+                      }
2638+                      outBuf [outPtr ++] = (char ) (0xD800  | (c  >> 10 ));
2639+                      c  = 0xDC00  | (c  & 0x3FF );
2640+                      // And let the other char output down below 
2641+                      break ;
2642+                  default :
2643+                      // Is this good enough error message? 
2644+                      _reportInvalidInitial (c );
25952645            }
25962646            // Need more room? 
25972647            if  (outPtr  >= outEnd ) {
@@ -2602,9 +2652,75 @@ private final void _finishChunkedText() throws IOException
26022652            // Ok, let's add char to output: 
26032653            outBuf [outPtr ++] = (char ) c ;
26042654        }
2655+ 
26052656        _textBuffer .setCurrentLength (outPtr );
26062657    }
26072658
2659+     /** 
2660+      * Reads in a tight loop ASCII text until a non-ASCII char is found. If any, then it returns false to signal the 
2661+      * caller that the chunk wasn't finished. The caller will keep adding to the _outBuf at the _outPtr position to 
2662+      * finish the current text buffer segment 
2663+      */ 
2664+     private  final  boolean  _finishChunkedTextAscii () throws  IOException 
2665+     {
2666+         final  byte [] input  = _inputBuffer ;
2667+         int  outPtr  = _textBuffer .getCurrentSegmentSize ();
2668+         char [] outBuf  = _textBuffer .getBufferWithoutReset ();
2669+         int  outEnd  = outBuf .length ;
2670+         while  (true ) {
2671+             // besides of which just need to ensure there's content 
2672+             _loadMoreForChunkIfNeeded ();
2673+ 
2674+             // Find the size of the loop 
2675+             int  inSize  =  _chunkEnd  - _inputPtr ;
2676+             int  outSize  = outEnd  - outPtr ;
2677+             int  inputPtr  = _inputPtr ;
2678+             int  inputPtrEnd  = _inputPtr  + Math .min (inSize , outSize );
2679+             int  i  = 0 ;
2680+             // loop with copying what we can. 
2681+             while  (inputPtr  < inputPtrEnd  && i  >= 0 ) {
2682+                 i  = input [inputPtr ++];
2683+                 char  val  = (char ) i ;
2684+                 outBuf [outPtr ++] = val ;
2685+             }
2686+             _inputPtr  = inputPtr ;
2687+ 
2688+             if  (i  < 0 ) {
2689+                 // Found a non-ascii char, correct pointers and return to the caller. 
2690+                 _inputPtr  -= 1 ;
2691+                 _textBuffer .setCurrentLength (outPtr  - 1 );
2692+                 // return false to signal this to the calling code to allow the multi-byte code-path to kick. 
2693+                 return  false ;
2694+             }
2695+             // Need more room? 
2696+             if  (outPtr  >= outEnd ) {
2697+                 outBuf  = _textBuffer .finishCurrentSegment ();
2698+                 outPtr  = 0 ;
2699+                 outEnd  = outBuf .length ;
2700+             }
2701+             if  (_inputPtr  < _chunkEnd  || _chunkLeft  > 0 ) {
2702+                 continue ;
2703+             }
2704+             _textBuffer .setCurrentLength (outPtr );
2705+             return  true ;
2706+         }
2707+     }
2708+ 
2709+     private  final  void  _loadMoreForChunkIfNeeded () throws  IOException 
2710+     {
2711+         if  (_inputPtr  >= _inputEnd ) { // end of buffer, but not necessarily chunk 
2712+             loadMoreGuaranteed ();
2713+             int  end  = _inputPtr  + _chunkLeft ;
2714+             if  (end  <= _inputEnd ) { // all within buffer 
2715+                 _chunkLeft  = 0 ;
2716+                 _chunkEnd  = end ;
2717+             } else  { // stretches beyond 
2718+                 _chunkLeft  = (end  - _inputEnd );
2719+                 _chunkEnd  = _inputEnd ;
2720+             }
2721+         }
2722+     }
2723+ 
26082724    private  final  int  _nextByte () throws  IOException  {
26092725        int  inPtr  = _inputPtr ;
26102726        if  (inPtr  < _inputEnd ) {
@@ -3716,6 +3832,10 @@ protected final boolean _tryToLoadToHaveAtLeast(int minAvailable) throws IOExcep
37163832        if  (_inputStream  == null ) {
37173833            return  false ;
37183834        }
3835+         // The code below assumes this is true, so we check it here. 
3836+         if  (_inputBuffer .length  < minAvailable )  {
3837+             return  false ;
3838+         }
37193839        // Need to move remaining data in front? 
37203840        int  amount  = _inputEnd  - _inputPtr ;
37213841        if  (amount  > 0  && _inputPtr  > 0 ) {
0 commit comments