Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions src/main/java/com/github/underscore/U.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
Expand Down Expand Up @@ -2783,20 +2783,26 @@ public static String xmlToJson(String xml, XmlToJsonMode mode) {
return xmlToJson(xml, Json.JsonStringBuilder.Step.TWO_SPACES, mode);
}

public static void fileXmlToJson(String xmlFileName, String jsonFileName, Json.JsonStringBuilder.Step identStep)
public static void fileXmlToJson(
String xmlFileName, String jsonFileName, Json.JsonStringBuilder.Step identStep)
throws IOException {
final byte[] bytes = Files.readAllBytes(Paths.get(xmlFileName));
String xmlText = new String(removeBom(bytes), detectEncoding(bytes));
Files.write(Paths.get(jsonFileName), formatString(xmlToJson(xmlText, identStep),
System.lineSeparator()).getBytes(StandardCharsets.UTF_8));
Files.write(
Paths.get(jsonFileName),
formatString(xmlToJson(xmlText, identStep), System.lineSeparator())
.getBytes(StandardCharsets.UTF_8));
}

public static void fileXmlToJson(String xmlFileName, String jsonFileName) throws IOException {
fileXmlToJson(xmlFileName, jsonFileName, Json.JsonStringBuilder.Step.TWO_SPACES);
}

public static void streamXmlToJson(InputStream xmlInputStream, OutputStream jsonOutputStream,
Json.JsonStringBuilder.Step indentStep) throws IOException {
public static void streamXmlToJson(
InputStream xmlInputStream,
OutputStream jsonOutputStream,
Json.JsonStringBuilder.Step indentStep)
throws IOException {
byte[] bytes = xmlInputStream.readAllBytes();
String encoding = detectEncoding(bytes);
String xmlText = new String(removeBom(bytes), encoding);
Expand All @@ -2805,7 +2811,8 @@ public static void streamXmlToJson(InputStream xmlInputStream, OutputStream json
jsonOutputStream.write(formattedJson.getBytes(StandardCharsets.UTF_8));
}

public static void streamXmlToJson(InputStream xmlInputStream, OutputStream jsonOutputStream) throws IOException {
public static void streamXmlToJson(InputStream xmlInputStream, OutputStream jsonOutputStream)
throws IOException {
streamXmlToJson(xmlInputStream, jsonOutputStream, Json.JsonStringBuilder.Step.TWO_SPACES);
}

Expand All @@ -2827,10 +2834,11 @@ public static String detectEncoding(byte[] buffer) {
return "UTF8";
}
String encoding = null;
int n = ((buffer[0] & 0xFF) << 24)
| ((buffer[1] & 0xFF) << 16)
| ((buffer[2] & 0xFF) << 8)
| (buffer[3] & 0xFF);
int n =
((buffer[0] & 0xFF) << 24)
| ((buffer[1] & 0xFF) << 16)
| ((buffer[2] & 0xFF) << 8)
| (buffer[3] & 0xFF);
switch (n) {
case 0x0000FEFF:
case 0x0000003C:
Expand All @@ -2845,11 +2853,11 @@ public static String detectEncoding(byte[] buffer) {
case 0x3C000000:
encoding = "UTF_32LE";
break;
// <?
// <?
case 0x3C003F00:
encoding = "UnicodeLittleUnmarked";
break;
// <?xm
// <?xm
case 0x3C3F786D:
encoding = "UTF8";
break;
Expand Down
3 changes: 1 addition & 2 deletions src/test/java/com/github/underscore/CollectionsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1677,8 +1677,7 @@ public Integer next() {
}

@Override
public void remove() {
}
public void remove() {}
};
assertEquals(6, Underscore.size(iterable));
assertEquals(5, Underscore.size(new Integer[] {5, 4, 3, 2, 1}));
Expand Down
135 changes: 77 additions & 58 deletions src/test/java/com/github/underscore/UnderscoreTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -877,7 +877,8 @@ void testRemoveUtf16LeBom() {
void testNotShortBytesNoBom() {
// Less than 2 bytes (not possible to have BOM)
byte[] input = new byte[] {42};
assertArrayEquals(input, U.removeBom(input), "Short arrays with no BOM should be unchanged");
assertArrayEquals(
input, U.removeBom(input), "Short arrays with no BOM should be unchanged");
}

@Test
Expand All @@ -891,26 +892,36 @@ void testNoBomPresent() {
void testAlmostBomButNotEnoughBytes() {
// only 2 bytes, not enough for UTF-8 BOM
byte[] input = new byte[] {-17, -69};
assertArrayEquals(input, U.removeBom(input), "Arrays with too few BOM bytes should be unchanged");
assertArrayEquals(
input, U.removeBom(input), "Arrays with too few BOM bytes should be unchanged");
}

@Test
void testPrefixSimilarButNotABom() {
byte[] input = new byte[] {-1, 0, 1};
assertArrayEquals(input, U.removeBom(input), "Array starting with -1,0 is not a BOM, should be unchanged");
assertArrayEquals(
input,
U.removeBom(input),
"Array starting with -1,0 is not a BOM, should be unchanged");
input = new byte[] {-2, 0, 1};
assertArrayEquals(input, U.removeBom(input), "Array starting with -2,0 is not a BOM, should be unchanged");
assertArrayEquals(
input,
U.removeBom(input),
"Array starting with -2,0 is not a BOM, should be unchanged");
// 3 bytes but third is not -65
input = new byte[] {-17, -69, 0};
assertArrayEquals(input, U.removeBom(input), "Array with -17,-69,<not -65> is not a BOM");
input = new byte[] { -17, -69 };
input = new byte[] {-17, -69};
assertArrayEquals(input, U.removeBom(input), "Should not remove BOM for length < 3");
input = new byte[] { 0, -69, -65, 33 };
assertArrayEquals(input, U.removeBom(input), "Should not remove BOM if first byte is not -17");
input = new byte[] { -17, 0, -65, 13 };
assertArrayEquals(input, U.removeBom(input), "Should not remove BOM if second byte is not -69");
input = new byte[] { -17, -69, 0, 14 };
assertArrayEquals(input, U.removeBom(input), "Should not remove BOM if third byte is not -65");
input = new byte[] {0, -69, -65, 33};
assertArrayEquals(
input, U.removeBom(input), "Should not remove BOM if first byte is not -17");
input = new byte[] {-17, 0, -65, 13};
assertArrayEquals(
input, U.removeBom(input), "Should not remove BOM if second byte is not -69");
input = new byte[] {-17, -69, 0, 14};
assertArrayEquals(
input, U.removeBom(input), "Should not remove BOM if third byte is not -65");
}

@Test
Expand All @@ -926,90 +937,97 @@ void testLengthLessThan4() {
@Test
void testCase_0x0000FEFF() {
byte[] buf = {(byte) 0x00, (byte) 0x00, (byte) 0xFE, (byte) 0xFF};
assertEquals("UTF_32BE", U.detectEncoding(buf),
"Should return UTF_32BE for BOM 0x0000FEFF");
assertEquals(
"UTF_32BE", U.detectEncoding(buf), "Should return UTF_32BE for BOM 0x0000FEFF");
}

@Test
void testCase_0x0000003C() {
byte[] buf = {(byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x3C};
assertEquals("UTF_32BE", U.detectEncoding(buf),
"Should return UTF_32BE for 0x0000003C");
assertEquals("UTF_32BE", U.detectEncoding(buf), "Should return UTF_32BE for 0x0000003C");
}

@Test
void testCase_0x003C003F() {
byte[] buf = {(byte) 0x00, (byte) 0x3C, (byte) 0x00, (byte) 0x3F};
assertEquals("UnicodeBigUnmarked", U.detectEncoding(buf),
assertEquals(
"UnicodeBigUnmarked",
U.detectEncoding(buf),
"Should return UnicodeBigUnmarked for 0x003C003F");
}

@Test
void testCase_0xFFFE0000() {
byte[] buf = {(byte) 0xFF, (byte) 0xFE, (byte) 0x00, (byte) 0x00};
assertEquals("UTF_32LE", U.detectEncoding(buf),
"Should return UTF_32LE for BOM 0xFFFE0000");
assertEquals(
"UTF_32LE", U.detectEncoding(buf), "Should return UTF_32LE for BOM 0xFFFE0000");
}

@Test
void testCase_0x3C000000() {
byte[] buf = {(byte) 0x3C, (byte) 0x00, (byte) 0x00, (byte) 0x00};
assertEquals("UTF_32LE", U.detectEncoding(buf),
"Should return UTF_32LE for 0x3C000000");
assertEquals("UTF_32LE", U.detectEncoding(buf), "Should return UTF_32LE for 0x3C000000");
}

@Test
void testCase_0x3C003F00() {
byte[] buf = {(byte) 0x3C, (byte) 0x00, (byte) 0x3F, (byte) 0x00};
assertEquals("UnicodeLittleUnmarked", U.detectEncoding(buf),
assertEquals(
"UnicodeLittleUnmarked",
U.detectEncoding(buf),
"Should return UnicodeLittleUnmarked for 0x3C003F00");
}

@Test
void testCase_0x3C3F786D() {
byte[] buf = {(byte) 0x3C, (byte) 0x3F, (byte) 0x78, (byte) 0x6D};
assertEquals("UTF8", U.detectEncoding(buf),
"Should return UTF8 for 0x3C3F786D");
assertEquals("UTF8", U.detectEncoding(buf), "Should return UTF8 for 0x3C3F786D");
}

@Test
void testEfBbBf_UTF8() {
// 0xEFBBBF??, so n >>> 8 == 0xEFBBBF
// Let's set: [0xEF, 0xBB, 0xBF, 0x42] (0x42 is arbitrary)
byte[] buf = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF, (byte) 0x42};
assertEquals("UTF8", U.detectEncoding(buf),
"Should return UTF8 for buffer with UTF-8 BOM");
assertEquals("UTF8", U.detectEncoding(buf), "Should return UTF8 for buffer with UTF-8 BOM");
}

@Test
void test_nShift24_0x3C() {
// (n >>> 24) == 0x3C, but not matching any above case
byte[] buf = {(byte) 0x3C, 1, 2, 3};
assertEquals("UTF8", U.detectEncoding(buf),
assertEquals(
"UTF8",
U.detectEncoding(buf),
"Should return UTF8 when (n >>> 24) == 0x3C and no previous case matches");
}

@Test
void test_nShift16_0xFFFE() {
// (n >>> 16) == 0xFFFE (UnicodeLittleUnmarked branch)
byte[] buf = {(byte) 0xFF, (byte) 0xFE, (byte) 0x21, (byte) 0x22};
assertEquals("UnicodeLittleUnmarked", U.detectEncoding(buf),
assertEquals(
"UnicodeLittleUnmarked",
U.detectEncoding(buf),
"Should return UnicodeLittleUnmarked when (n >> 16) == 0xFFFE");
}

@Test
void test_nShift16_0xFEFF() {
// (n >>> 16) == 0xFEFF (UnicodeBigUnmarked branch)
byte[] buf = {(byte) 0xFE, (byte) 0xFF, (byte) 0x99, (byte) 0x88};
assertEquals("UnicodeBigUnmarked", U.detectEncoding(buf),
assertEquals(
"UnicodeBigUnmarked",
U.detectEncoding(buf),
"Should return UnicodeBigUnmarked when (n >> 16) == 0xFEFF");
}

@Test
void testDefaultCase() {
// Random data, not matching any case nor any shift checks. Should default to UTF8
byte[] buf = {(byte) 0x01, (byte) 0x23, (byte) 0x45, (byte) 0x67};
assertEquals("UTF8", U.detectEncoding(buf), "Should default to UTF8 for unknown byte patterns");
assertEquals(
"UTF8", U.detectEncoding(buf), "Should default to UTF8 for unknown byte patterns");
}

@Test
Expand All @@ -1019,31 +1037,25 @@ void testFormatString() {
assertEquals(
input1,
U.formatString(input1, "\n"),
"Should not modify string when line separator is already \\n"
);
"Should not modify string when line separator is already \\n");

// Test with different line separator
String input2 = "line1\nline2\nline3";
String expected2 = "line1\r\nline2\r\nline3";
assertEquals(
expected2,
U.formatString(input2, "\r\n"),
"Should replace \\n with specified line separator"
);
"Should replace \\n with specified line separator");

// Test with empty string
assertTrue(
U.formatString("", "\n").isEmpty(),
"Should handle empty string correctly"
);
assertTrue(U.formatString("", "\n").isEmpty(), "Should handle empty string correctly");

// Test with no line breaks
String noBreaks = "text without breaks";
assertEquals(
noBreaks,
U.formatString(noBreaks, "\r\n"),
"Should not modify string without line breaks"
);
"Should not modify string without line breaks");
}

@Test
Expand All @@ -1059,21 +1071,19 @@ void testFileXmlToJson(@TempDir Path tempDir) throws IOException {
// Test file conversion
assertDoesNotThrow(
() -> U.fileXmlToJson(xmlPath.toString(), jsonPath.toString()),
"File conversion should not throw exceptions"
);
"File conversion should not throw exceptions");

// Verify the JSON file
assertTrue(
Files.exists(jsonPath),
"JSON file should be created"
);
assertTrue(Files.exists(jsonPath), "JSON file should be created");

String jsonContent = Files.readString(jsonPath);
assertAll("JSON file content verification",
assertAll(
"JSON file content verification",
() -> assertNotNull(jsonContent, "JSON content should not be null"),
() -> assertTrue(jsonContent.contains("\"item\": \"value\""),
"JSON should contain converted XML content")
);
() ->
assertTrue(
jsonContent.contains("\"item\": \"value\""),
"JSON should contain converted XML content"));
}

@Test
Expand All @@ -1084,8 +1094,7 @@ void testFileXmlToJsonWithInvalidInput(@TempDir Path tempDir) {
assertThrows(
IOException.class,
() -> U.fileXmlToJson(nonExistentXml.toString(), outputJson.toString()),
"Should throw IOException when input file doesn't exist"
);
"Should throw IOException when input file doesn't exist");
}

@Test
Expand All @@ -1105,9 +1114,14 @@ void testStreamXmlToJson_validXml_writesJson() throws IOException {
void testStreamXmlToJson_emptyInput_producesEmptyOrError() {
InputStream xmlStream = new ByteArrayInputStream(new byte[0]);
ByteArrayOutputStream jsonStream = new ByteArrayOutputStream();
Exception exception = assertThrows(Exception.class, () -> {
U.streamXmlToJson(xmlStream, jsonStream, Json.JsonStringBuilder.Step.TWO_SPACES);
}, "Should throw exception for empty input.");
Exception exception =
assertThrows(
Exception.class,
() -> {
U.streamXmlToJson(
xmlStream, jsonStream, Json.JsonStringBuilder.Step.TWO_SPACES);
},
"Should throw exception for empty input.");
String msg = exception.getMessage();
assertNotNull(msg, "Exception message should not be null.");
}
Expand All @@ -1118,9 +1132,14 @@ void testStreamXmlToJson_invalidXml_throwsException() {
String invalidXml = "<root><name>Test</name>";
InputStream xmlStream = new ByteArrayInputStream(invalidXml.getBytes());
ByteArrayOutputStream jsonStream = new ByteArrayOutputStream();
Exception exception = assertThrows(Exception.class, () -> {
U.streamXmlToJson(xmlStream, jsonStream, Json.JsonStringBuilder.Step.TWO_SPACES);
}, "Should throw exception for invalid XML.");
Exception exception =
assertThrows(
Exception.class,
() -> {
U.streamXmlToJson(
xmlStream, jsonStream, Json.JsonStringBuilder.Step.TWO_SPACES);
},
"Should throw exception for invalid XML.");
String msg = exception.getMessage();
assertNotNull(msg, "Exception message for invalid XML should not be null.");
}
Expand Down
Loading