Skip to content

Commit 6881003

Browse files
committed
Add 'encodeCharsetToByte()' encoders
fixes #7
1 parent 7995d0f commit 6881003

File tree

3 files changed

+48
-14
lines changed

3 files changed

+48
-14
lines changed

CHANGELOG

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Releases
22

3+
## v0.6.0
4+
5+
* add `encodeCharsetToBytes()` feature #7
6+
37
## v0.5.0
48

59
* better resource handling for compression

src/main/java/at/favre/lib/bytes/Bytes.java

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,7 @@
2121

2222
package at.favre.lib.bytes;
2323

24-
import java.io.ByteArrayInputStream;
25-
import java.io.DataInput;
26-
import java.io.File;
27-
import java.io.InputStream;
28-
import java.io.Serializable;
24+
import java.io.*;
2925
import java.math.BigInteger;
3026
import java.nio.ByteBuffer;
3127
import java.nio.ByteOrder;
@@ -35,13 +31,7 @@
3531
import java.nio.charset.StandardCharsets;
3632
import java.security.SecureRandom;
3733
import java.text.Normalizer;
38-
import java.util.Arrays;
39-
import java.util.BitSet;
40-
import java.util.Collection;
41-
import java.util.Iterator;
42-
import java.util.List;
43-
import java.util.Objects;
44-
import java.util.Random;
34+
import java.util.*;
4535

4636
/**
4737
* Bytes is wrapper class for an byte-array that allows a lot of convenience operations on it:
@@ -1403,6 +1393,30 @@ public String encodeCharset(Charset charset) {
14031393
return new String(internalArray(), charset);
14041394
}
14051395

1396+
/**
1397+
* UTF-8 representation of this byte array as byte array
1398+
* <p>
1399+
* Similar to <code>encodeUtf8().getBytes(StandardCharsets.UTF_8)</code>.
1400+
*
1401+
* @return utf-8 encoded byte array
1402+
* @see <a href="https://en.wikipedia.org/wiki/UTF-8">UTF-8</a>
1403+
*/
1404+
public byte[] encodeUtf8ToBytes() {
1405+
return encodeCharsetToBytes(StandardCharsets.UTF_8);
1406+
}
1407+
1408+
/**
1409+
* Byte array representation with given charset encoding.
1410+
* <p>
1411+
* Similar to <code>encodeCharset(charset).getBytes(charset)</code>.
1412+
*
1413+
* @param charset the charset the return will be encoded
1414+
* @return encoded byte array
1415+
*/
1416+
public byte[] encodeCharsetToBytes(Charset charset) {
1417+
return encodeCharset(charset).getBytes(charset);
1418+
}
1419+
14061420
/**
14071421
* Encode the internal byte-array with given encoder.
14081422
*
@@ -1426,8 +1440,8 @@ public List<Byte> toList() {
14261440
}
14271441

14281442
/**
1429-
* @deprecated renamed API, use {@link #toBoxedArray()} instead - will be removed in v1.0+
14301443
* @return see {@link #toBoxedArray()}
1444+
* @deprecated renamed API, use {@link #toBoxedArray()} instead - will be removed in v1.0+
14311445
*/
14321446
@Deprecated
14331447
public Byte[] toObjectArray() {
@@ -1636,7 +1650,7 @@ public boolean equals(byte[] anotherArray) {
16361650
* Compares the inner array with given array. The comparison is done in constant time, therefore
16371651
* will not break on the first mismatch. This method is useful to prevent some side-channel attacks,
16381652
* but is slower on average.
1639-
*
1653+
* <p>
16401654
* This implementation uses the algorithm suggested in https://codahale.com/a-lesson-in-timing-attacks/
16411655
*
16421656
* @param anotherArray to compare with

src/test/java/at/favre/lib/bytes/BytesConstructorTests.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,22 @@ public void fromString() {
241241
checkString("7asdh#ö01^^`´dµ@€", StandardCharsets.ISO_8859_1);
242242
}
243243

244+
@Test
245+
public void encodeCharsetToBytes() {
246+
byte[][] testVectors = new byte[][]{example_bytes_seven, example_bytes_one, example_bytes_two, example2_bytes_seven, example_bytes_twentyfour};
247+
248+
for (byte[] testVector : testVectors) {
249+
System.out.println(new String(testVector, StandardCharsets.ISO_8859_1));
250+
System.out.println(new String(Bytes.wrap(testVector).encodeCharsetToBytes(StandardCharsets.ISO_8859_1), StandardCharsets.ISO_8859_1));
251+
System.out.println(new String(testVector, StandardCharsets.UTF_8));
252+
System.out.println(new String(Bytes.wrap(testVector).encodeCharsetToBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8));
253+
254+
assertArrayEquals(new String(testVector, StandardCharsets.ISO_8859_1).getBytes(StandardCharsets.ISO_8859_1), Bytes.wrap(testVector).encodeCharsetToBytes(StandardCharsets.ISO_8859_1));
255+
assertArrayEquals(new String(testVector, StandardCharsets.UTF_8).getBytes(StandardCharsets.UTF_8), Bytes.wrap(testVector).encodeCharsetToBytes(StandardCharsets.UTF_8));
256+
assertArrayEquals(new String(testVector, StandardCharsets.UTF_8).getBytes(StandardCharsets.UTF_8), Bytes.wrap(testVector).encodeUtf8ToBytes());
257+
}
258+
}
259+
244260
@Test
245261
public void fromCharArray() {
246262
checkCharArray("");

0 commit comments

Comments
 (0)