Skip to content

Commit 31149ef

Browse files
committed
Merge branch '2.19'
2 parents b8fa168 + 2e88cb6 commit 31149ef

File tree

6 files changed

+240
-60
lines changed

6 files changed

+240
-60
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,5 @@ jobs:
6363
uses: codecov/codecov-action@0565863a31f2c772f9f0395002a31e3f06189574 # v5.4.0
6464
with:
6565
token: ${{ secrets.CODECOV_TOKEN }}
66-
file: ./target/site/jacoco/jacoco.xml
66+
files: ./target/site/jacoco/jacoco.xml
6767
flags: unittests

cbor/src/main/java/tools/jackson/dataformat/cbor/CBORParser.java

Lines changed: 175 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
import java.math.BigInteger;
88
import java.nio.charset.Charset;
99
import java.nio.charset.StandardCharsets;
10-
import java.util.*;
10+
import java.util.ArrayList;
11+
import java.util.Arrays;
12+
import java.util.Stack;
1113

1214
import tools.jackson.core.*;
1315
import tools.jackson.core.base.ParserBase;
@@ -2322,10 +2324,9 @@ protected void _finishToken() throws JacksonException
23222324

23232325
if ((available >= len)
23242326
// if not, could we read? NOTE: we do not require it, just attempt to read
2325-
|| ((_inputBuffer.length >= len)
2326-
&& _tryToLoadToHaveAtLeast(len))) {
2327-
_finishShortText(len);
2328-
return;
2327+
|| _tryToLoadToHaveAtLeast(len)) {
2328+
_finishShortText(len);
2329+
return;
23292330
}
23302331
// If not enough space, need handling similar to chunked
23312332
_finishLongText(len);
@@ -2361,11 +2362,9 @@ protected String _finishTextToken(int ch) throws JacksonException
23612362
// due to inputBuffer never being even close to that big).
23622363

23632364
final int available = _inputEnd - _inputPtr;
2364-
23652365
if ((available >= len)
23662366
// if not, could we read? NOTE: we do not require it, just attempt to read
2367-
|| ((_inputBuffer.length >= len)
2368-
&& _tryToLoadToHaveAtLeast(len))) {
2367+
|| _tryToLoadToHaveAtLeast(len)) {
23692368
return _finishShortText(len);
23702369
}
23712370
// If not enough space, need handling similar to chunked
@@ -2394,19 +2393,22 @@ private final String _finishShortText(int len) throws JacksonException
23942393

23952394
// Let's actually do a tight loop for ASCII first:
23962395
final int end = _inputPtr;
2397-
2398-
int i;
2399-
while ((i = inputBuf[inPtr]) >= 0) {
2396+
int i = 0;
2397+
while (inPtr < end && i >= 0) {
2398+
i = inputBuf[inPtr++];
24002399
outBuf[outPtr++] = (char) i;
2401-
if (++inPtr == end) {
2402-
String str = _textBuffer.setCurrentAndReturn(outPtr);
2403-
if (stringRefs != null) {
2404-
stringRefs.stringRefs.add(str);
2405-
_sharedString = str;
2406-
}
2407-
return str;
2400+
}
2401+
if (inPtr == end && i >= 0) {
2402+
String str = _textBuffer.setCurrentAndReturn(outPtr);
2403+
if (stringRefs != null) {
2404+
stringRefs.stringRefs.add(str);
2405+
_sharedString = str;
24082406
}
2407+
return str;
24092408
}
2409+
// Correct extra increments
2410+
outPtr -= 1;
2411+
inPtr -= 1;
24102412
final int[] codes = UTF8_UNIT_CODES;
24112413
do {
24122414
i = inputBuf[inPtr++] & 0xFF;
@@ -2473,10 +2475,17 @@ private final String _finishShortText(int len) throws JacksonException
24732475

24742476
private final String _finishLongText(int len) throws JacksonException
24752477
{
2476-
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
2477-
int outPtr = 0;
2478-
final int[] codes = UTF8_UNIT_CODES;
2478+
StringRefList stringRefs = null;
2479+
if (!_stringRefs.empty() &&
2480+
shouldReferenceString(_stringRefs.peek().stringRefs.size(), len)) {
2481+
stringRefs = _stringRefs.peek();
2482+
}
2483+
// First a tight loop for ASCII.
2484+
len = _finishLongTextAscii(len);
2485+
char[] outBuf = _textBuffer.getBufferWithoutReset();
2486+
int outPtr = _textBuffer.getCurrentSegmentSize();
24792487
int outEnd = outBuf.length;
2488+
final int[] codes = UTF8_UNIT_CODES;
24802489

24812490
while (--len >= 0) {
24822491
int c = _nextByte() & 0xFF;
@@ -2530,14 +2539,51 @@ private final String _finishLongText(int len) throws JacksonException
25302539
outBuf[outPtr++] = (char) c;
25312540
}
25322541
String str = _textBuffer.setCurrentAndReturn(outPtr);
2533-
if (!_stringRefs.empty() &&
2534-
shouldReferenceString(_stringRefs.peek().stringRefs.size(), len)) {
2535-
_stringRefs.peek().stringRefs.add(str);
2542+
if (stringRefs != null) {
2543+
stringRefs.stringRefs.add(str);
25362544
_sharedString = str;
25372545
}
25382546
return str;
25392547
}
25402548

2549+
/**
2550+
* Consumes as many ascii chars as possible in a tight loop. Returns the amount of bytes remaining.
2551+
*/
2552+
private final int _finishLongTextAscii(int len) throws JacksonException
2553+
{
2554+
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
2555+
final byte[] input = _inputBuffer;
2556+
while (len > 0) {
2557+
// load as much input as possible
2558+
int size = Math.min(len, Math.min(outBuf.length, input.length));
2559+
if (!_tryToLoadToHaveAtLeast(size)) {
2560+
return len;
2561+
}
2562+
int outEnd = size;
2563+
int outPtr = 0;
2564+
int inPtr = _inputPtr;
2565+
int i = 0;
2566+
// Tight loop to copy into the output buffer, bail if a non-ascii char is found
2567+
while (outPtr < outEnd && i >= 0) {
2568+
i = input[inPtr++];
2569+
outBuf[outPtr++] = (char) i;
2570+
}
2571+
// Found a non-ascii char, correct pointers and return to the caller.
2572+
if (i < 0) {
2573+
--outPtr;
2574+
_inputPtr = inPtr - 1;
2575+
_textBuffer.setCurrentLength(outPtr);
2576+
return len - outPtr;
2577+
}
2578+
_inputPtr = inPtr;
2579+
if (outPtr >= outBuf.length) {
2580+
outBuf = _textBuffer.finishCurrentSegment();
2581+
}
2582+
len -= size;
2583+
}
2584+
return len;
2585+
}
2586+
25412587
private final void _finishChunkedText() throws JacksonException
25422588
{
25432589
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
@@ -2562,7 +2608,6 @@ private final void _finishChunkedText() throws JacksonException
25622608
}
25632609
break;
25642610
}
2565-
_chunkLeft = len;
25662611
int end = _inputPtr + len;
25672612
if (end <= _inputEnd) { // all within buffer
25682613
_chunkLeft = 0;
@@ -2571,19 +2616,22 @@ private final void _finishChunkedText() throws JacksonException
25712616
_chunkLeft = (end - _inputEnd);
25722617
_chunkEnd = _inputEnd;
25732618
}
2574-
}
2575-
// besides of which just need to ensure there's content
2576-
if (_inputPtr >= _inputEnd) { // end of buffer, but not necessarily chunk
2577-
loadMoreGuaranteed();
2578-
int end = _inputPtr + _chunkLeft;
2579-
if (end <= _inputEnd) { // all within buffer
2580-
_chunkLeft = 0;
2581-
_chunkEnd = end;
2582-
} else { // stretches beyond
2583-
_chunkLeft = (end - _inputEnd);
2584-
_chunkEnd = _inputEnd;
2619+
// start of a new chunk
2620+
// First a tight loop for ASCII.
2621+
_textBuffer.setCurrentLength(outPtr);
2622+
if (_finishChunkedTextAscii()) {
2623+
// chunk fully consumed, let's get the next one
2624+
outBuf = _textBuffer.getBufferWithoutReset();
2625+
outPtr = _textBuffer.getCurrentSegmentSize();
2626+
outEnd = outBuf.length;
2627+
continue;
25852628
}
2629+
outBuf = _textBuffer.getBufferWithoutReset();
2630+
outPtr = _textBuffer.getCurrentSegmentSize();
2631+
outEnd = outBuf.length;
25862632
}
2633+
// besides of which just need to ensure there's content
2634+
_loadMoreForChunkIfNeeded();
25872635
}
25882636
int c = input[_inputPtr++] & 0xFF;
25892637
int code = codes[c];
@@ -2593,9 +2641,9 @@ private final void _finishChunkedText() throws JacksonException
25932641
}
25942642

25952643
switch (code) {
2596-
case 0:
2597-
break;
2598-
case 1: // 2-byte UTF
2644+
case 0:
2645+
break;
2646+
case 1: // 2-byte UTF
25992647
{
26002648
int d = _nextChunkedByte();
26012649
if ((d & 0xC0) != 0x080) {
@@ -2604,24 +2652,24 @@ private final void _finishChunkedText() throws JacksonException
26042652
c = ((c & 0x1F) << 6) | (d & 0x3F);
26052653
}
26062654
break;
2607-
case 2: // 3-byte UTF
2608-
c = _decodeChunkedUTF8_3(c);
2609-
break;
2610-
case 3: // 4-byte UTF
2611-
c = _decodeChunkedUTF8_4(c);
2612-
// Let's add first part right away:
2613-
if (outPtr >= outBuf.length) {
2614-
outBuf = _textBuffer.finishCurrentSegment();
2615-
outPtr = 0;
2616-
outEnd = outBuf.length;
2617-
}
2618-
outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
2619-
c = 0xDC00 | (c & 0x3FF);
2620-
// And let the other char output down below
2621-
break;
2622-
default:
2623-
// Is this good enough error message?
2624-
_reportInvalidInitial(c);
2655+
case 2: // 3-byte UTF
2656+
c = _decodeChunkedUTF8_3(c);
2657+
break;
2658+
case 3: // 4-byte UTF
2659+
c = _decodeChunkedUTF8_4(c);
2660+
// Let's add first part right away:
2661+
if (outPtr >= outBuf.length) {
2662+
outBuf = _textBuffer.finishCurrentSegment();
2663+
outPtr = 0;
2664+
outEnd = outBuf.length;
2665+
}
2666+
outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
2667+
c = 0xDC00 | (c & 0x3FF);
2668+
// And let the other char output down below
2669+
break;
2670+
default:
2671+
// Is this good enough error message?
2672+
_reportInvalidInitial(c);
26252673
}
26262674
// Need more room?
26272675
if (outPtr >= outEnd) {
@@ -2632,9 +2680,75 @@ private final void _finishChunkedText() throws JacksonException
26322680
// Ok, let's add char to output:
26332681
outBuf[outPtr++] = (char) c;
26342682
}
2683+
26352684
_textBuffer.setCurrentLength(outPtr);
26362685
}
26372686

2687+
/**
2688+
* Reads in a tight loop ASCII text until a non-ASCII char is found. If any, then it returns false to signal the
2689+
* caller that the chunk wasn't finished. The caller will keep adding to the _outBuf at the _outPtr position to
2690+
* finish the current text buffer segment
2691+
*/
2692+
private final boolean _finishChunkedTextAscii() throws JacksonException
2693+
{
2694+
final byte[] input = _inputBuffer;
2695+
int outPtr = _textBuffer.getCurrentSegmentSize();
2696+
char[] outBuf = _textBuffer.getBufferWithoutReset();
2697+
int outEnd = outBuf.length;
2698+
while (true) {
2699+
// besides of which just need to ensure there's content
2700+
_loadMoreForChunkIfNeeded();
2701+
2702+
// Find the size of the loop
2703+
int inSize = _chunkEnd - _inputPtr;
2704+
int outSize = outEnd - outPtr;
2705+
int inputPtr = _inputPtr;
2706+
int inputPtrEnd = _inputPtr + Math.min(inSize, outSize);
2707+
int i = 0;
2708+
// loop with copying what we can.
2709+
while (inputPtr < inputPtrEnd && i >= 0) {
2710+
i = input[inputPtr++];
2711+
char val = (char) i;
2712+
outBuf[outPtr++] = val;
2713+
}
2714+
_inputPtr = inputPtr;
2715+
2716+
if (i < 0) {
2717+
// Found a non-ascii char, correct pointers and return to the caller.
2718+
_inputPtr -= 1;
2719+
_textBuffer.setCurrentLength(outPtr - 1);
2720+
// return false to signal this to the calling code to allow the multi-byte code-path to kick.
2721+
return false;
2722+
}
2723+
// Need more room?
2724+
if (outPtr >= outEnd) {
2725+
outBuf = _textBuffer.finishCurrentSegment();
2726+
outPtr = 0;
2727+
outEnd = outBuf.length;
2728+
}
2729+
if (_inputPtr < _chunkEnd || _chunkLeft > 0) {
2730+
continue;
2731+
}
2732+
_textBuffer.setCurrentLength(outPtr);
2733+
return true;
2734+
}
2735+
}
2736+
2737+
private final void _loadMoreForChunkIfNeeded() throws JacksonException
2738+
{
2739+
if (_inputPtr >= _inputEnd) { // end of buffer, but not necessarily chunk
2740+
loadMoreGuaranteed();
2741+
int end = _inputPtr + _chunkLeft;
2742+
if (end <= _inputEnd) { // all within buffer
2743+
_chunkLeft = 0;
2744+
_chunkEnd = end;
2745+
} else { // stretches beyond
2746+
_chunkLeft = (end - _inputEnd);
2747+
_chunkEnd = _inputEnd;
2748+
}
2749+
}
2750+
}
2751+
26382752
private final int _nextByte() throws JacksonException {
26392753
int inPtr = _inputPtr;
26402754
if (inPtr < _inputEnd) {
@@ -3758,6 +3872,10 @@ protected final boolean _tryToLoadToHaveAtLeast(int minAvailable) throws Jackson
37583872
if (_inputStream == null) {
37593873
return false;
37603874
}
3875+
// The code below assumes this is true, so we check it here.
3876+
if (_inputBuffer.length < minAvailable) {
3877+
return false;
3878+
}
37613879
// Need to move remaining data in front?
37623880
int amount = _inputEnd - _inputPtr;
37633881
if (amount > 0 && _inputPtr > 0) {

cbor/src/test/java/tools/jackson/dataformat/cbor/CBORTestBase.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,10 @@ protected static String generateUnicodeString(int length) {
280280
return generateUnicodeString(length, new Random(length));
281281
}
282282

283+
protected static String generateUnicodeStringWithAsciiPrefix(int asciiPrefixLen, int length) {
284+
return generateUnicodeStringWithAsciiPrefix(asciiPrefixLen, length, new Random(length));
285+
}
286+
283287
protected static String generateUnicodeString(int length, Random rnd)
284288
{
285289
StringBuilder sw = new StringBuilder(length+10);
@@ -305,6 +309,31 @@ protected static String generateUnicodeString(int length, Random rnd)
305309
return sw.toString();
306310
}
307311

312+
protected static String generateUnicodeStringWithAsciiPrefix(int asciiLength, int length, Random rnd)
313+
{
314+
StringBuilder sw = new StringBuilder(length+10);
315+
// add a prefix of ascii chars
316+
int num = asciiLength;
317+
while (--num >= 0) {
318+
sw.append((char) ('A' + (num % 32)));
319+
}
320+
do {
321+
// Then a unicode char of 2, 3 or 4 bytes long
322+
switch (rnd.nextInt() % 3) {
323+
case 0:
324+
sw.append((char) (256 + rnd.nextInt() & 511));
325+
break;
326+
case 1:
327+
sw.append((char) (2048 + rnd.nextInt() & 4095));
328+
break;
329+
default:
330+
sw.append((char) (65536 + rnd.nextInt() & 0x3FFF));
331+
break;
332+
}
333+
} while (sw.length() < length);
334+
return sw.toString();
335+
}
336+
308337
protected static String generateLongAsciiString(int length) {
309338
return generateLongAsciiString(length, new Random(length));
310339
}

0 commit comments

Comments
 (0)