55import  java .math .BigInteger ;
66import  java .nio .charset .Charset ;
77import  java .nio .charset .StandardCharsets ;
8- import  java .util .*;
8+ import  java .util .ArrayList ;
9+ import  java .util .Arrays ;
10+ import  java .util .Stack ;
911
1012import  com .fasterxml .jackson .core .*;
1113import  com .fasterxml .jackson .core .base .ParserMinimalBase ;
1214import  com .fasterxml .jackson .core .io .IOContext ;
1315import  com .fasterxml .jackson .core .io .NumberInput ;
1416import  com .fasterxml .jackson .core .json .DupDetector ;
1517import  com .fasterxml .jackson .core .sym .ByteQuadsCanonicalizer ;
16- import  com .fasterxml .jackson .core .util .*;
18+ import  com .fasterxml .jackson .core .util .ByteArrayBuilder ;
19+ import  com .fasterxml .jackson .core .util .JacksonFeatureSet ;
20+ import  com .fasterxml .jackson .core .util .TextBuffer ;
1721
1822import  static  com .fasterxml .jackson .dataformat .cbor .CBORConstants .*;
1923
@@ -328,6 +332,11 @@ public int getFirstTag() {
328332     */ 
329333    protected  int  _typeByte ;
330334
335+     /** 
336+      * A pointer to know where to write text when we share an output buffer across methods 
337+      */ 
338+     protected  int  _sharedOutBufferPtr ;
339+ 
331340    /** 
332341     * Type to keep track of a list of string references. A depth is stored to know when to pop the 
333342     * references off the stack for nested namespaces. 
@@ -2289,10 +2298,9 @@ protected void _finishToken() throws IOException
22892298
22902299        if  ((available  >= len )
22912300                // if not, could we read? NOTE: we do not require it, just attempt to read 
2292-                     || ((_inputBuffer .length  >= len )
2293-                             && _tryToLoadToHaveAtLeast (len ))) {
2294-                 _finishShortText (len );
2295-                 return ;
2301+                 || _tryToLoadToHaveAtLeast (len )) {
2302+             _finishShortText (len );
2303+             return ;
22962304        }
22972305        // If not enough space, need handling similar to chunked 
22982306        _finishLongText (len );
@@ -2331,11 +2339,9 @@ protected String _finishTextToken(int ch) throws IOException
23312339        //    due to inputBuffer never being even close to that big). 
23322340
23332341        final  int  available  = _inputEnd  - _inputPtr ;
2334- 
23352342        if  ((available  >= len )
23362343            // if not, could we read? NOTE: we do not require it, just attempt to read 
2337-                 || ((_inputBuffer .length  >= len )
2338-                         && _tryToLoadToHaveAtLeast (len ))) {
2344+                 || _tryToLoadToHaveAtLeast (len )) {
23392345            return  _finishShortText (len );
23402346        }
23412347        // If not enough space, need handling similar to chunked 
@@ -2364,19 +2370,22 @@ private final String _finishShortText(int len) throws IOException
23642370
23652371        // Let's actually do a tight loop for ASCII first: 
23662372        final  int  end  = _inputPtr ;
2367- 
2368-         int   i ; 
2369-         while  (( i  = inputBuf [inPtr ]) >=  0 ) { 
2373+          int   i  =  0 ; 
2374+         while  ( inPtr  <  end  &&  i  >=  0 ) { 
2375+              i  = inputBuf [inPtr ++]; 
23702376            outBuf [outPtr ++] = (char ) i ;
2371-             if  (++inPtr  == end ) {
2372-                 String  str  = _textBuffer .setCurrentAndReturn (outPtr );
2373-                 if  (stringRefs  != null ) {
2374-                     stringRefs .stringRefs .add (str );
2375-                     _sharedString  = str ;
2376-                 }
2377-                 return  str ;
2377+         }
2378+         if  (inPtr  == end  && i  >= 0 ) {
2379+             String  str  = _textBuffer .setCurrentAndReturn (outPtr );
2380+             if  (stringRefs  != null ) {
2381+                 stringRefs .stringRefs .add (str );
2382+                 _sharedString  = str ;
23782383            }
2384+             return  str ;
23792385        }
2386+         // Correct extra increments 
2387+         outPtr  -= 1 ;
2388+         inPtr  -= 1 ;
23802389        final  int [] codes  = UTF8_UNIT_CODES ;
23812390        do  {
23822391            i  = inputBuf [inPtr ++] & 0xFF ;
@@ -2443,10 +2452,17 @@ private final String _finishShortText(int len) throws IOException
24432452
24442453    private  final  String  _finishLongText (int  len ) throws  IOException 
24452454    {
2446-         char [] outBuf  = _textBuffer .emptyAndGetCurrentSegment ();
2447-         int  outPtr  = 0 ;
2448-         final  int [] codes  = UTF8_UNIT_CODES ;
2455+         StringRefList  stringRefs  = null ;
2456+         if  (!_stringRefs .empty () &&
2457+                 shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2458+             stringRefs  = _stringRefs .peek ();
2459+         }
2460+         // First a tight loop for ASCII. 
2461+         len  = _finishLongTextAscii (len );
2462+         char [] outBuf  = _textBuffer .getBufferWithoutReset ();
2463+         int  outPtr  = _sharedOutBufferPtr ;
24492464        int  outEnd  = outBuf .length ;
2465+         final  int [] codes  = UTF8_UNIT_CODES ;
24502466
24512467        while  (--len  >= 0 ) {
24522468            int  c  = _nextByte () & 0xFF ;
@@ -2500,14 +2516,52 @@ private final String _finishLongText(int len) throws IOException
25002516            outBuf [outPtr ++] = (char ) c ;
25012517        }
25022518        String  str  = _textBuffer .setCurrentAndReturn (outPtr );
2503-         if  (!_stringRefs .empty () &&
2504-                 shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2505-             _stringRefs .peek ().stringRefs .add (str );
2519+         if  (stringRefs  != null ) {
2520+             stringRefs .stringRefs .add (str );
25062521            _sharedString  = str ;
25072522        }
25082523        return  str ;
25092524    }
25102525
2526+     /** 
2527+      * Consumes as many ascii chars as possible in a tight loop. Returns the amount of bytes remaining. 
2528+      */ 
2529+     private  final  int  _finishLongTextAscii (int  len ) throws  IOException 
2530+     {
2531+         char [] outBuf  = _textBuffer .emptyAndGetCurrentSegment ();
2532+         final  byte [] input  = _inputBuffer ;
2533+         _sharedOutBufferPtr  = 0 ;
2534+         while  (len  > 0 ) {
2535+             // load as much input as possible 
2536+             int  size  = Math .min (len , Math .min (outBuf .length , input .length ));
2537+             if  (!_tryToLoadToHaveAtLeast (size )) {
2538+                 _sharedOutBufferPtr  = 0 ;
2539+                 return  len ;
2540+             }
2541+             int  outEnd  = size ;
2542+             int  outPtr  = 0 ;
2543+             int  inPtr  = _inputPtr ;
2544+             int  i  = 0 ;
2545+             // Tight loop to copy into the output buffer, bail if a non-ascii char is found 
2546+             while  (outPtr  < outEnd  && i  >= 0 ) {
2547+                 i  = input [inPtr ++];
2548+                 outBuf [outPtr ++] = (char ) i ;
2549+             }
2550+             // Found a non-ascii char, correct pointers and return to the caller. 
2551+             if  (i  < 0 ) {
2552+                 _inputPtr  = inPtr  - 1 ;
2553+                 _sharedOutBufferPtr  = outPtr  - 1 ;
2554+                 return  len  - _sharedOutBufferPtr ;
2555+             }
2556+             _inputPtr  = inPtr ;
2557+             if  (outPtr  >= outBuf .length ) {
2558+                 outBuf  = _textBuffer .finishCurrentSegment ();
2559+             }
2560+             len  -= size ;
2561+         }
2562+         return  len ;
2563+     }
2564+ 
25112565    private  final  void  _finishChunkedText () throws  IOException 
25122566    {
25132567        char [] outBuf  = _textBuffer .emptyAndGetCurrentSegment ();
@@ -2532,7 +2586,6 @@ private final void _finishChunkedText() throws IOException
25322586                        }
25332587                        break ;
25342588                    }
2535-                     _chunkLeft  = len ;
25362589                    int  end  = _inputPtr  + len ;
25372590                    if  (end  <= _inputEnd ) { // all within buffer 
25382591                        _chunkLeft  = 0 ;
@@ -2541,19 +2594,22 @@ private final void _finishChunkedText() throws IOException
25412594                        _chunkLeft  = (end  - _inputEnd );
25422595                        _chunkEnd  = _inputEnd ;
25432596                    }
2544-                 }
2545-                 // besides of which just need to ensure there's content 
2546-                 if  (_inputPtr  >= _inputEnd ) { // end of buffer, but not necessarily chunk 
2547-                     loadMoreGuaranteed ();
2548-                     int  end  = _inputPtr  + _chunkLeft ;
2549-                     if  (end  <= _inputEnd ) { // all within buffer 
2550-                         _chunkLeft  = 0 ;
2551-                         _chunkEnd  = end ;
2552-                     } else  { // stretches beyond 
2553-                         _chunkLeft  = (end  - _inputEnd );
2554-                         _chunkEnd  = _inputEnd ;
2597+                     // start of a new chunk 
2598+                     // First a tight loop for ASCII. 
2599+                     _sharedOutBufferPtr  = outPtr ;
2600+                     if  (_finishChunkedTextAscii ()) {
2601+                         // chunk fully consumed, let's get the next one 
2602+                         outBuf  = _textBuffer .getBufferWithoutReset ();
2603+                         outPtr  = _sharedOutBufferPtr ;
2604+                         outEnd  = outBuf .length ;
2605+                         continue ;
25552606                    }
2607+                     outBuf  = _textBuffer .getBufferWithoutReset ();
2608+                     outEnd  = outBuf .length ;
2609+                     outPtr  = _sharedOutBufferPtr ;
25562610                }
2611+                 // besides of which just need to ensure there's content 
2612+                 _loadMoreForChunkIfNeeded ();
25572613            }
25582614            int  c  = input [_inputPtr ++] & 0xFF ;
25592615            int  code  = codes [c ];
@@ -2563,9 +2619,9 @@ private final void _finishChunkedText() throws IOException
25632619            }
25642620
25652621            switch  (code ) {
2566-             case  0 :
2567-                 break ;
2568-             case  1 : // 2-byte UTF 
2622+                  case  0 :
2623+                      break ;
2624+                  case  1 : // 2-byte UTF 
25692625                {
25702626                    int  d  = _nextChunkedByte ();
25712627                    if  ((d  & 0xC0 ) != 0x080 ) {
@@ -2574,24 +2630,24 @@ private final void _finishChunkedText() throws IOException
25742630                    c  = ((c  & 0x1F ) << 6 ) | (d  & 0x3F );
25752631                }
25762632                break ;
2577-             case  2 : // 3-byte UTF 
2578-                 c  = _decodeChunkedUTF8_3 (c );
2579-                 break ;
2580-             case  3 : // 4-byte UTF 
2581-                 c  = _decodeChunkedUTF8_4 (c );
2582-                 // Let's add first part right away: 
2583-                 if  (outPtr  >= outBuf .length ) {
2584-                     outBuf  = _textBuffer .finishCurrentSegment ();
2585-                     outPtr  = 0 ;
2586-                     outEnd  = outBuf .length ;
2587-                 }
2588-                 outBuf [outPtr ++] = (char ) (0xD800  | (c  >> 10 ));
2589-                 c  = 0xDC00  | (c  & 0x3FF );
2590-                 // And let the other char output down below 
2591-                 break ;
2592-             default :
2593-                 // Is this good enough error message? 
2594-                 _reportInvalidInitial (c );
2633+                  case  2 : // 3-byte UTF 
2634+                      c  = _decodeChunkedUTF8_3 (c );
2635+                      break ;
2636+                  case  3 : // 4-byte UTF 
2637+                      c  = _decodeChunkedUTF8_4 (c );
2638+                      // Let's add first part right away: 
2639+                      if  (outPtr  >= outBuf .length ) {
2640+                          outBuf  = _textBuffer .finishCurrentSegment ();
2641+                          outPtr  = 0 ;
2642+                          outEnd  = outBuf .length ;
2643+                      }
2644+                      outBuf [outPtr ++] = (char ) (0xD800  | (c  >> 10 ));
2645+                      c  = 0xDC00  | (c  & 0x3FF );
2646+                      // And let the other char output down below 
2647+                      break ;
2648+                  default :
2649+                      // Is this good enough error message? 
2650+                      _reportInvalidInitial (c );
25952651            }
25962652            // Need more room? 
25972653            if  (outPtr  >= outEnd ) {
@@ -2602,9 +2658,76 @@ private final void _finishChunkedText() throws IOException
26022658            // Ok, let's add char to output: 
26032659            outBuf [outPtr ++] = (char ) c ;
26042660        }
2661+ 
26052662        _textBuffer .setCurrentLength (outPtr );
26062663    }
26072664
2665+     /** 
2666+      * Reads in a tight loop ASCII text until a non-ASCII char is found. If any, then it returns false to signal the 
2667+      * caller that the chunk wasn't finished. The caller will keep adding to the _outBuf at the _outPtr position to 
2668+      * finish the current text buffer segment 
2669+      */ 
2670+     private  final  boolean  _finishChunkedTextAscii () throws  IOException 
2671+     {
2672+         final  byte [] input  = _inputBuffer ;
2673+         int  outPtr  = _sharedOutBufferPtr ;
2674+         char [] outBuf  = _textBuffer .getBufferWithoutReset ();
2675+         int  outEnd  = outBuf .length ;
2676+         while  (true ) {
2677+             // besides of which just need to ensure there's content 
2678+             _loadMoreForChunkIfNeeded ();
2679+ 
2680+             // Find the size of the loop 
2681+             int  inSize  =  _chunkEnd  - _inputPtr ;
2682+             int  outSize  = outEnd  - outPtr ;
2683+             int  inputPtr  = _inputPtr ;
2684+             int  inputPtrEnd  = _inputPtr  + Math .min (inSize , outSize );
2685+             int  i  = 0 ;
2686+             // loop with copying what we can. 
2687+             while  (inputPtr  < inputPtrEnd  && i  >= 0 ) {
2688+                 i  = input [inputPtr ++];
2689+                 char  val  = (char ) i ;
2690+                 outBuf [outPtr ++] = val ;
2691+             }
2692+             _inputPtr  = inputPtr ;
2693+ 
2694+             if  (i  < 0 ) {
2695+                 // Found a non-ascii char, correct pointers and return to the caller. 
2696+                 outPtr  -= 1 ;
2697+                 _inputPtr  -= 1 ;
2698+                 _sharedOutBufferPtr  = outPtr ;
2699+                 // return false to signal this to the calling code to allow the multi-byte code-path to kick. 
2700+                 return  false ;
2701+             }
2702+             // Need more room? 
2703+             if  (outPtr  >= outEnd ) {
2704+                 outBuf  = _textBuffer .finishCurrentSegment ();
2705+                 outPtr  = 0 ;
2706+                 outEnd  = outBuf .length ;
2707+             }
2708+             if  (_inputPtr  < _chunkEnd  || _chunkLeft  > 0 ) {
2709+                 continue ;
2710+             }
2711+             _sharedOutBufferPtr  = outPtr ;
2712+             return  true ;
2713+         }
2714+     }
2715+ 
2716+     private  final  void  _loadMoreForChunkIfNeeded () throws  IOException 
2717+     {
2718+         if  (_inputPtr  >= _inputEnd ) { // end of buffer, but not necessarily chunk 
2719+             loadMoreGuaranteed ();
2720+             int  end  = _inputPtr  + _chunkLeft ;
2721+             if  (end  <= _inputEnd ) { // all within buffer 
2722+                 _chunkLeft  = 0 ;
2723+                 _chunkEnd  = end ;
2724+             } else  { // stretches beyond 
2725+                 _chunkLeft  = (end  - _inputEnd );
2726+                 _chunkEnd  = _inputEnd ;
2727+             }
2728+         }
2729+     }
2730+ 
26082731    private  final  int  _nextByte () throws  IOException  {
26092732        int  inPtr  = _inputPtr ;
26102733        if  (inPtr  < _inputEnd ) {
@@ -3716,6 +3839,10 @@ protected final boolean _tryToLoadToHaveAtLeast(int minAvailable) throws IOExcep
37163839        if  (_inputStream  == null ) {
37173840            return  false ;
37183841        }
3842+         // The code below assumes this is true, so we check it here. 
3843+         if  (_inputBuffer .length  < minAvailable )  {
3844+             return  false ;
3845+         }
37193846        // Need to move remaining data in front? 
37203847        int  amount  = _inputEnd  - _inputPtr ;
37213848        if  (amount  > 0  && _inputPtr  > 0 ) {
0 commit comments