77import java .math .BigInteger ;
88import java .nio .charset .Charset ;
99import java .nio .charset .StandardCharsets ;
10- import java .util .*;
10+ import java .util .ArrayList ;
11+ import java .util .Arrays ;
12+ import java .util .Stack ;
1113
1214import tools .jackson .core .*;
1315import tools .jackson .core .base .ParserBase ;
@@ -2322,10 +2324,9 @@ protected void _finishToken() throws JacksonException
23222324
23232325 if ((available >= len )
23242326 // if not, could we read? NOTE: we do not require it, just attempt to read
2325- || ((_inputBuffer .length >= len )
2326- && _tryToLoadToHaveAtLeast (len ))) {
2327- _finishShortText (len );
2328- return ;
2327+ || _tryToLoadToHaveAtLeast (len )) {
2328+ _finishShortText (len );
2329+ return ;
23292330 }
23302331 // If not enough space, need handling similar to chunked
23312332 _finishLongText (len );
@@ -2361,11 +2362,9 @@ protected String _finishTextToken(int ch) throws JacksonException
23612362 // due to inputBuffer never being even close to that big).
23622363
23632364 final int available = _inputEnd - _inputPtr ;
2364-
23652365 if ((available >= len )
23662366 // if not, could we read? NOTE: we do not require it, just attempt to read
2367- || ((_inputBuffer .length >= len )
2368- && _tryToLoadToHaveAtLeast (len ))) {
2367+ || _tryToLoadToHaveAtLeast (len )) {
23692368 return _finishShortText (len );
23702369 }
23712370 // If not enough space, need handling similar to chunked
@@ -2394,19 +2393,22 @@ private final String _finishShortText(int len) throws JacksonException
23942393
23952394 // Let's actually do a tight loop for ASCII first:
23962395 final int end = _inputPtr ;
2397-
2398- int i ;
2399- while (( i = inputBuf [inPtr ]) >= 0 ) {
2396+ int i = 0 ;
2397+ while ( inPtr < end && i >= 0 ) {
2398+ i = inputBuf [inPtr ++];
24002399 outBuf [outPtr ++] = (char ) i ;
2401- if (++inPtr == end ) {
2402- String str = _textBuffer .setCurrentAndReturn (outPtr );
2403- if (stringRefs != null ) {
2404- stringRefs .stringRefs .add (str );
2405- _sharedString = str ;
2406- }
2407- return str ;
2400+ }
2401+ if (inPtr == end && i >= 0 ) {
2402+ String str = _textBuffer .setCurrentAndReturn (outPtr );
2403+ if (stringRefs != null ) {
2404+ stringRefs .stringRefs .add (str );
2405+ _sharedString = str ;
24082406 }
2407+ return str ;
24092408 }
2409+ // Correct extra increments
2410+ outPtr -= 1 ;
2411+ inPtr -= 1 ;
24102412 final int [] codes = UTF8_UNIT_CODES ;
24112413 do {
24122414 i = inputBuf [inPtr ++] & 0xFF ;
@@ -2473,10 +2475,17 @@ private final String _finishShortText(int len) throws JacksonException
24732475
24742476 private final String _finishLongText (int len ) throws JacksonException
24752477 {
2476- char [] outBuf = _textBuffer .emptyAndGetCurrentSegment ();
2477- int outPtr = 0 ;
2478- final int [] codes = UTF8_UNIT_CODES ;
2478+ StringRefList stringRefs = null ;
2479+ if (!_stringRefs .empty () &&
2480+ shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2481+ stringRefs = _stringRefs .peek ();
2482+ }
2483+ // First a tight loop for ASCII.
2484+ len = _finishLongTextAscii (len );
2485+ char [] outBuf = _textBuffer .getBufferWithoutReset ();
2486+ int outPtr = _textBuffer .getCurrentSegmentSize ();
24792487 int outEnd = outBuf .length ;
2488+ final int [] codes = UTF8_UNIT_CODES ;
24802489
24812490 while (--len >= 0 ) {
24822491 int c = _nextByte () & 0xFF ;
@@ -2530,14 +2539,51 @@ private final String _finishLongText(int len) throws JacksonException
25302539 outBuf [outPtr ++] = (char ) c ;
25312540 }
25322541 String str = _textBuffer .setCurrentAndReturn (outPtr );
2533- if (!_stringRefs .empty () &&
2534- shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2535- _stringRefs .peek ().stringRefs .add (str );
2542+ if (stringRefs != null ) {
2543+ stringRefs .stringRefs .add (str );
25362544 _sharedString = str ;
25372545 }
25382546 return str ;
25392547 }
25402548
2549+ /**
2550+ * Consumes as many ascii chars as possible in a tight loop. Returns the amount of bytes remaining.
2551+ */
2552+ private final int _finishLongTextAscii (int len ) throws JacksonException
2553+ {
2554+ char [] outBuf = _textBuffer .emptyAndGetCurrentSegment ();
2555+ final byte [] input = _inputBuffer ;
2556+ while (len > 0 ) {
2557+ // load as much input as possible
2558+ int size = Math .min (len , Math .min (outBuf .length , input .length ));
2559+ if (!_tryToLoadToHaveAtLeast (size )) {
2560+ return len ;
2561+ }
2562+ int outEnd = size ;
2563+ int outPtr = 0 ;
2564+ int inPtr = _inputPtr ;
2565+ int i = 0 ;
2566+ // Tight loop to copy into the output buffer, bail if a non-ascii char is found
2567+ while (outPtr < outEnd && i >= 0 ) {
2568+ i = input [inPtr ++];
2569+ outBuf [outPtr ++] = (char ) i ;
2570+ }
2571+ // Found a non-ascii char, correct pointers and return to the caller.
2572+ if (i < 0 ) {
2573+ --outPtr ;
2574+ _inputPtr = inPtr - 1 ;
2575+ _textBuffer .setCurrentLength (outPtr );
2576+ return len - outPtr ;
2577+ }
2578+ _inputPtr = inPtr ;
2579+ if (outPtr >= outBuf .length ) {
2580+ outBuf = _textBuffer .finishCurrentSegment ();
2581+ }
2582+ len -= size ;
2583+ }
2584+ return len ;
2585+ }
2586+
25412587 private final void _finishChunkedText () throws JacksonException
25422588 {
25432589 char [] outBuf = _textBuffer .emptyAndGetCurrentSegment ();
@@ -2562,7 +2608,6 @@ private final void _finishChunkedText() throws JacksonException
25622608 }
25632609 break ;
25642610 }
2565- _chunkLeft = len ;
25662611 int end = _inputPtr + len ;
25672612 if (end <= _inputEnd ) { // all within buffer
25682613 _chunkLeft = 0 ;
@@ -2571,19 +2616,22 @@ private final void _finishChunkedText() throws JacksonException
25712616 _chunkLeft = (end - _inputEnd );
25722617 _chunkEnd = _inputEnd ;
25732618 }
2574- }
2575- // besides of which just need to ensure there's content
2576- if (_inputPtr >= _inputEnd ) { // end of buffer, but not necessarily chunk
2577- loadMoreGuaranteed ();
2578- int end = _inputPtr + _chunkLeft ;
2579- if (end <= _inputEnd ) { // all within buffer
2580- _chunkLeft = 0 ;
2581- _chunkEnd = end ;
2582- } else { // stretches beyond
2583- _chunkLeft = (end - _inputEnd );
2584- _chunkEnd = _inputEnd ;
2619+ // start of a new chunk
2620+ // First a tight loop for ASCII.
2621+ _textBuffer .setCurrentLength (outPtr );
2622+ if (_finishChunkedTextAscii ()) {
2623+ // chunk fully consumed, let's get the next one
2624+ outBuf = _textBuffer .getBufferWithoutReset ();
2625+ outPtr = _textBuffer .getCurrentSegmentSize ();
2626+ outEnd = outBuf .length ;
2627+ continue ;
25852628 }
2629+ outBuf = _textBuffer .getBufferWithoutReset ();
2630+ outPtr = _textBuffer .getCurrentSegmentSize ();
2631+ outEnd = outBuf .length ;
25862632 }
2633+ // besides of which just need to ensure there's content
2634+ _loadMoreForChunkIfNeeded ();
25872635 }
25882636 int c = input [_inputPtr ++] & 0xFF ;
25892637 int code = codes [c ];
@@ -2593,9 +2641,9 @@ private final void _finishChunkedText() throws JacksonException
25932641 }
25942642
25952643 switch (code ) {
2596- case 0 :
2597- break ;
2598- case 1 : // 2-byte UTF
2644+ case 0 :
2645+ break ;
2646+ case 1 : // 2-byte UTF
25992647 {
26002648 int d = _nextChunkedByte ();
26012649 if ((d & 0xC0 ) != 0x080 ) {
@@ -2604,24 +2652,24 @@ private final void _finishChunkedText() throws JacksonException
26042652 c = ((c & 0x1F ) << 6 ) | (d & 0x3F );
26052653 }
26062654 break ;
2607- case 2 : // 3-byte UTF
2608- c = _decodeChunkedUTF8_3 (c );
2609- break ;
2610- case 3 : // 4-byte UTF
2611- c = _decodeChunkedUTF8_4 (c );
2612- // Let's add first part right away:
2613- if (outPtr >= outBuf .length ) {
2614- outBuf = _textBuffer .finishCurrentSegment ();
2615- outPtr = 0 ;
2616- outEnd = outBuf .length ;
2617- }
2618- outBuf [outPtr ++] = (char ) (0xD800 | (c >> 10 ));
2619- c = 0xDC00 | (c & 0x3FF );
2620- // And let the other char output down below
2621- break ;
2622- default :
2623- // Is this good enough error message?
2624- _reportInvalidInitial (c );
2655+ case 2 : // 3-byte UTF
2656+ c = _decodeChunkedUTF8_3 (c );
2657+ break ;
2658+ case 3 : // 4-byte UTF
2659+ c = _decodeChunkedUTF8_4 (c );
2660+ // Let's add first part right away:
2661+ if (outPtr >= outBuf .length ) {
2662+ outBuf = _textBuffer .finishCurrentSegment ();
2663+ outPtr = 0 ;
2664+ outEnd = outBuf .length ;
2665+ }
2666+ outBuf [outPtr ++] = (char ) (0xD800 | (c >> 10 ));
2667+ c = 0xDC00 | (c & 0x3FF );
2668+ // And let the other char output down below
2669+ break ;
2670+ default :
2671+ // Is this good enough error message?
2672+ _reportInvalidInitial (c );
26252673 }
26262674 // Need more room?
26272675 if (outPtr >= outEnd ) {
@@ -2632,9 +2680,75 @@ private final void _finishChunkedText() throws JacksonException
26322680 // Ok, let's add char to output:
26332681 outBuf [outPtr ++] = (char ) c ;
26342682 }
2683+
26352684 _textBuffer .setCurrentLength (outPtr );
26362685 }
26372686
2687+ /**
2688+ * Reads in a tight loop ASCII text until a non-ASCII char is found. If any, then it returns false to signal the
2689+ * caller that the chunk wasn't finished. The caller will keep adding to the _outBuf at the _outPtr position to
2690+ * finish the current text buffer segment
2691+ */
2692+ private final boolean _finishChunkedTextAscii () throws JacksonException
2693+ {
2694+ final byte [] input = _inputBuffer ;
2695+ int outPtr = _textBuffer .getCurrentSegmentSize ();
2696+ char [] outBuf = _textBuffer .getBufferWithoutReset ();
2697+ int outEnd = outBuf .length ;
2698+ while (true ) {
2699+ // besides of which just need to ensure there's content
2700+ _loadMoreForChunkIfNeeded ();
2701+
2702+ // Find the size of the loop
2703+ int inSize = _chunkEnd - _inputPtr ;
2704+ int outSize = outEnd - outPtr ;
2705+ int inputPtr = _inputPtr ;
2706+ int inputPtrEnd = _inputPtr + Math .min (inSize , outSize );
2707+ int i = 0 ;
2708+ // loop with copying what we can.
2709+ while (inputPtr < inputPtrEnd && i >= 0 ) {
2710+ i = input [inputPtr ++];
2711+ char val = (char ) i ;
2712+ outBuf [outPtr ++] = val ;
2713+ }
2714+ _inputPtr = inputPtr ;
2715+
2716+ if (i < 0 ) {
2717+ // Found a non-ascii char, correct pointers and return to the caller.
2718+ _inputPtr -= 1 ;
2719+ _textBuffer .setCurrentLength (outPtr - 1 );
2720+ // return false to signal this to the calling code to allow the multi-byte code-path to kick.
2721+ return false ;
2722+ }
2723+ // Need more room?
2724+ if (outPtr >= outEnd ) {
2725+ outBuf = _textBuffer .finishCurrentSegment ();
2726+ outPtr = 0 ;
2727+ outEnd = outBuf .length ;
2728+ }
2729+ if (_inputPtr < _chunkEnd || _chunkLeft > 0 ) {
2730+ continue ;
2731+ }
2732+ _textBuffer .setCurrentLength (outPtr );
2733+ return true ;
2734+ }
2735+ }
2736+
2737+ private final void _loadMoreForChunkIfNeeded () throws JacksonException
2738+ {
2739+ if (_inputPtr >= _inputEnd ) { // end of buffer, but not necessarily chunk
2740+ loadMoreGuaranteed ();
2741+ int end = _inputPtr + _chunkLeft ;
2742+ if (end <= _inputEnd ) { // all within buffer
2743+ _chunkLeft = 0 ;
2744+ _chunkEnd = end ;
2745+ } else { // stretches beyond
2746+ _chunkLeft = (end - _inputEnd );
2747+ _chunkEnd = _inputEnd ;
2748+ }
2749+ }
2750+ }
2751+
26382752 private final int _nextByte () throws JacksonException {
26392753 int inPtr = _inputPtr ;
26402754 if (inPtr < _inputEnd ) {
@@ -3758,6 +3872,10 @@ protected final boolean _tryToLoadToHaveAtLeast(int minAvailable) throws Jackson
37583872 if (_inputStream == null ) {
37593873 return false ;
37603874 }
3875+ // The code below assumes this is true, so we check it here.
3876+ if (_inputBuffer .length < minAvailable ) {
3877+ return false ;
3878+ }
37613879 // Need to move remaining data in front?
37623880 int amount = _inputEnd - _inputPtr ;
37633881 if (amount > 0 && _inputPtr > 0 ) {
0 commit comments