-
-
Notifications
You must be signed in to change notification settings - Fork 819
Closed
Milestone
Description
Version: Jackson 2.9.8
parser.getCurrentLocation().getByteOffset()
returns the wrong byte offset for the underlying byte array when the payload start with a BOM.
The json parser processes well such json payloads but the JsonLocation
it returns ignores the offset introduced by the BOM.
Full standalone repro:
package test;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import java.io.IOException;
import java.util.Arrays;
public class Test {
private static final JsonFactory JSON_FACTORY;
static {
JSON_FACTORY = new JsonFactory();
JSON_FACTORY.setCharacterEscapes(null);
JSON_FACTORY.disable(JsonFactory.Feature.INTERN_FIELD_NAMES);
}
static byte[] extract(byte[] bytes, JsonParser parser, JsonToken token) throws IOException {
switch (token) {
case START_OBJECT:
int startIndex = (int) parser.getCurrentLocation().getByteOffset() - 1;
parser.skipChildren();
int endIndex = (int) parser.getCurrentLocation().getByteOffset();
return Arrays.copyOfRange(bytes, startIndex, endIndex);
}
throw new RuntimeException();
}
static byte[] parseAndExtract(byte[] bytes) throws IOException {
JsonParser parser = JSON_FACTORY.createParser(bytes);
System.out.println("parser type: " + parser.getClass().getCanonicalName());
parser.nextToken(); // skip start object
switch (parser.nextToken()) {
case FIELD_NAME:
if ("payload".equalsIgnoreCase(parser.getCurrentName())) {
return extract(bytes, parser, parser.nextToken());
}
break;
}
throw new RuntimeException();
}
public static void main(String[] args) throws Exception {
String json = "{\"payload\":{\"name\":\"foo\"}}";
byte[] bytes = json.getBytes();
System.out.println("UTF-8 no BOM:");
byte[] result = parseAndExtract(bytes);
System.out.println(new String(result));
System.out.println();
System.out.println("UTF-8 no BOM, with leading characters:");
result = parseAndExtract(("\r\n\t\n\t\t" + json).getBytes());
System.out.println(new String(result));
System.out.println();
byte[] newBytes = new byte[bytes.length + 3];
// write BOM
newBytes[0] = (byte) 0xEF;
newBytes[1] = (byte) 0xBB;
newBytes[2] = (byte) 0xBF;
System.arraycopy(bytes, 0, newBytes, 3, bytes.length);
System.out.println("UTF-8 BOM:");
result = parseAndExtract(newBytes);
System.out.println(new String(result));
System.out.println();
}
}
Output:
UTF-8 no BOM:
parser type: com.fasterxml.jackson.core.json.UTF8StreamJsonParser
{"name":"foo"}
UTF-8 no BOM, with leading characters:
parser type: com.fasterxml.jackson.core.json.UTF8StreamJsonParser
{"name":"foo"}
UTF-8 BOM:
parser type: com.fasterxml.jackson.core.json.UTF8StreamJsonParser
d":{"name":"fo
You can see the result for the BOM payload gets shifted to the left by exactly 3 bytes while other payloads with or without padding characters are handled as expected.
erks
Metadata
Metadata
Assignees
Labels
No labels