Skip to content

Commit 8b0b9a6

Browse files
committed
Add ByteOrderMark.matches(int[])
- We now compare int arrays directly without calling ByteOrderMark.get(int) in a loop - Internal refactoring
1 parent 73a38a1 commit 8b0b9a6

File tree

4 files changed

+84
-36
lines changed

4 files changed

+84
-36
lines changed

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ The <action> type attribute can be add,update,fix,remove.
6464
<action dev="ggregory" type="add" due-to="Gary Gregory">Add ProxyOutputStream.setReference(OutputStream).</action>
6565
<action dev="ggregory" type="add" due-to="Gary Gregory">Add RandomAccessFileInputStream.copy(long, long, OutputStream).</action>
6666
<action dev="ggregory" type="add" due-to="Gary Gregory">Add ProxyOutputStream.Builder.</action>
67+
<action dev="ggregory" type="add" due-to="Gary Gregory">Add ByteOrderMark.matches(int[]).</action>
6768
<!-- UPDATE -->
6869
<action dev="ggregory" type="update" due-to="Dependabot, Gary Gregory">Bump commons.bytebuddy.version from 1.15.10 to 1.17.0 #710, #715, #720.</action>
6970
<action dev="ggregory" type="update" due-to="Gary Gregory">Bump commons-codec:commons-codec from 1.17.1 to 1.18.0. #717.</action>

src/main/java/org/apache/commons/io/ByteOrderMark.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,10 @@ public byte[] getBytes() {
193193
return copy;
194194
}
195195

196+
int[] getRawBytes() {
197+
return bytes;
198+
}
199+
196200
/**
197201
* Gets the name of the {@link java.nio.charset.Charset} the BOM represents.
198202
*
@@ -226,6 +230,33 @@ public int length() {
226230
return bytes.length;
227231
}
228232

233+
/**
234+
* Tests whether the given array starts with the bytes for this BOM.
235+
*
236+
* @param test the array to test.
237+
* @return whether the given array starts with the bytes for this BOM.
238+
* @since 2.19.0
239+
*/
240+
public boolean matches(final int[] test) {
241+
// Our test are never null.
242+
if (bytes == test) {
243+
return true;
244+
}
245+
if (test == null) {
246+
return false;
247+
}
248+
final int length = bytes.length;
249+
if (test.length < length) {
250+
return false;
251+
}
252+
for (int i = 0; i < length; i++) {
253+
if (bytes[i] != test[i]) {
254+
return false;
255+
}
256+
}
257+
return true;
258+
}
259+
229260
/**
230261
* Converts this instance to a String representation of the BOM.
231262
*

src/main/java/org/apache/commons/io/input/BOMInputStream.java

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -311,9 +311,9 @@ public BOMInputStream(final InputStream delegate, final ByteOrderMark... boms) {
311311
}
312312

313313
/**
314-
* Find a BOM with the configured bytes in {@code bomList}.
314+
* Finds a BOM with the configured bytes in {@code bomList}.
315315
*
316-
* @return The matched BOM or null if none matched
316+
* @return The matched BOM or null if none matched.
317317
*/
318318
private ByteOrderMark find() {
319319
return bomList.stream().filter(this::matches).findFirst().orElse(null);
@@ -322,34 +322,13 @@ private ByteOrderMark find() {
322322
/**
323323
* Gets the BOM (Byte Order Mark).
324324
*
325-
* @return The BOM or null if none
325+
* @return The BOM or null if none matched.
326326
* @throws IOException
327-
* if an error reading the first bytes of the stream occurs
327+
* if an error reading the first bytes of the stream occurs.
328328
*/
329329
public ByteOrderMark getBOM() throws IOException {
330330
if (firstBytes == null) {
331-
fbLength = 0;
332-
// BOMs are sorted from longest to shortest
333-
final int maxBomSize = bomList.get(0).length();
334-
firstBytes = new int[maxBomSize];
335-
// Read first maxBomSize bytes
336-
for (int i = 0; i < firstBytes.length; i++) {
337-
firstBytes[i] = in.read();
338-
afterRead(firstBytes[i]);
339-
fbLength++;
340-
if (firstBytes[i] < 0) {
341-
break;
342-
}
343-
}
344-
// match BOM in firstBytes
345-
byteOrderMark = find();
346-
if (byteOrderMark != null && !include) {
347-
if (byteOrderMark.length() < firstBytes.length) {
348-
fbIndex = byteOrderMark.length();
349-
} else {
350-
fbLength = 0;
351-
}
352-
}
331+
byteOrderMark = readBom();
353332
}
354333
return byteOrderMark;
355334
}
@@ -416,16 +395,7 @@ public synchronized void mark(final int readLimit) {
416395
* @return true if the bytes match the bom, otherwise false
417396
*/
418397
private boolean matches(final ByteOrderMark bom) {
419-
// if (bom.length() != fbLength) {
420-
// return false;
421-
// }
422-
// firstBytes may be bigger than the BOM bytes
423-
for (int i = 0; i < bom.length(); i++) {
424-
if (bom.get(i) != firstBytes[i]) {
425-
return false;
426-
}
427-
}
428-
return true;
398+
return bom.matches(firstBytes);
429399
}
430400

431401
/**
@@ -486,6 +456,32 @@ public int read(final byte[] buf, int off, int len) throws IOException {
486456
return secondCount < 0 ? firstCount > 0 ? firstCount : EOF : firstCount + secondCount;
487457
}
488458

459+
private ByteOrderMark readBom() throws IOException {
460+
fbLength = 0;
461+
// BOMs are sorted from longest to shortest
462+
final int maxBomSize = bomList.get(0).length();
463+
firstBytes = new int[maxBomSize];
464+
// Read first maxBomSize bytes
465+
for (int i = 0; i < firstBytes.length; i++) {
466+
firstBytes[i] = in.read();
467+
afterRead(firstBytes[i]);
468+
fbLength++;
469+
if (firstBytes[i] < 0) {
470+
break;
471+
}
472+
}
473+
// match BOM in firstBytes
474+
final ByteOrderMark bom = find();
475+
if (bom != null && !include) {
476+
if (bom.length() < firstBytes.length) {
477+
fbIndex = bom.length();
478+
} else {
479+
fbLength = 0;
480+
}
481+
}
482+
return bom;
483+
}
484+
489485
/**
490486
* This method reads and either preserves or skips the first bytes in the stream. It behaves like the single-byte
491487
* {@code read()} method, either returning a valid byte or -1 to indicate that the initial bytes have been

src/test/java/org/apache/commons/io/ByteOrderMarkTest.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818

1919
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
2020
import static org.junit.jupiter.api.Assertions.assertEquals;
21+
import static org.junit.jupiter.api.Assertions.assertFalse;
2122
import static org.junit.jupiter.api.Assertions.assertNotEquals;
2223
import static org.junit.jupiter.api.Assertions.assertNotNull;
2324
import static org.junit.jupiter.api.Assertions.assertThrows;
25+
import static org.junit.jupiter.api.Assertions.assertTrue;
2426

2527
import java.nio.charset.Charset;
2628

@@ -126,6 +128,24 @@ public void testLength() {
126128
assertEquals(3, TEST_BOM_3.length(), "test3 length");
127129
}
128130

131+
@Test
132+
public void testMatches() {
133+
assertTrue(ByteOrderMark.UTF_16BE.matches(ByteOrderMark.UTF_16BE.getRawBytes()));
134+
assertTrue(ByteOrderMark.UTF_16LE.matches(ByteOrderMark.UTF_16LE.getRawBytes()));
135+
assertTrue(ByteOrderMark.UTF_32BE.matches(ByteOrderMark.UTF_32BE.getRawBytes()));
136+
assertTrue(ByteOrderMark.UTF_16BE.matches(ByteOrderMark.UTF_16BE.getRawBytes()));
137+
assertTrue(ByteOrderMark.UTF_8.matches(ByteOrderMark.UTF_8.getRawBytes()));
138+
139+
assertTrue(TEST_BOM_1.matches(TEST_BOM_1.getRawBytes()));
140+
assertTrue(TEST_BOM_2.matches(TEST_BOM_2.getRawBytes()));
141+
assertTrue(TEST_BOM_3.matches(TEST_BOM_3.getRawBytes()));
142+
143+
assertFalse(TEST_BOM_1.matches(new ByteOrderMark("1a", 2).getRawBytes()));
144+
assertTrue(TEST_BOM_1.matches(new ByteOrderMark("1b", 1, 2).getRawBytes()));
145+
assertFalse(TEST_BOM_2.matches(new ByteOrderMark("2", 1, 1).getRawBytes()));
146+
assertFalse(TEST_BOM_3.matches(new ByteOrderMark("3", 1, 2, 4).getRawBytes()));
147+
}
148+
129149
/** Tests {@link ByteOrderMark#toString()} */
130150
@Test
131151
public void testToString() {

0 commit comments

Comments
 (0)