Skip to content

Commit 3127402

Browse files
committed
Support for arbitrary radix encoding/parsing and fix test for number encoding
fixes #6, #20
1 parent 5af372c commit 3127402

File tree

6 files changed

+143
-31
lines changed

6 files changed

+143
-31
lines changed

CHANGELOG

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
## v0.8.0
44

5+
* add radix encoding/parsing and fix radix tests #6, #20
6+
57
## v0.7.1
68

79
* sign AFTER ProGuard so optimized version has correct jar signature

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ Bytes.wrap(array).transform(shuffle());
285285
### Parser and Encoder for Binary-Text-Encodings
286286

287287
This library can parse and encode a variety of encodings: binary, decimal, [octal](https://en.wikipedia.org/wiki/Octal),
288-
[hex](https://en.wikipedia.org/wiki/Hexadecimal), [base36](https://en.wikipedia.org/wiki/Base36) and
288+
[hex](https://en.wikipedia.org/wiki/Hexadecimal) and
289289
[base64](https://en.wikipedia.org/wiki/Base64). Additionally custom parsers are supported by providing your own
290290
implementation:
291291

@@ -318,7 +318,7 @@ Additionally the following encodings are supported:
318318
Bytes.from(array).encodeBinary(); //1110110110101111
319319
Bytes.from(array).encodeDec(); //20992966904426477
320320
Bytes.from(array).encodeOctal(); //1124517677707527755
321-
Bytes.from(array).encodeBase36(); //5qpdvuwjvu5
321+
Bytes.from(array).encodeRadix(36); //5qpdvuwjvu5
322322
```
323323

324324
### Handling Strings

src/main/java/at/favre/lib/bytes/BinaryToTextEncoding.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,13 @@ public byte[] decode(String encoded) {
159159
/**
160160
* Simple radix encoder which internally uses {@link BigInteger#toString(int)}
161161
*/
162-
class BaseRadix implements EncoderDecoder {
162+
class BaseRadixNumber implements EncoderDecoder {
163163
private final int radix;
164164

165-
BaseRadix(int radix) {
165+
BaseRadixNumber(int radix) {
166+
if (radix < Character.MIN_RADIX || radix > Character.MAX_RADIX) {
167+
throw new IllegalArgumentException("supported radix is between " + Character.MIN_RADIX + " and " + Character.MAX_RADIX);
168+
}
166169
this.radix = radix;
167170
}
168171

@@ -181,9 +184,5 @@ public byte[] decode(String encoded) {
181184
}
182185
return array;
183186
}
184-
185-
// private int maxLength(byte[] data, int radix) {
186-
// return BigInteger.valueOf(2).pow(BigInteger.valueOf(data.length).multiply(BigInteger.valueOf(8)).intValue()).toString(radix).length();
187-
// }
188187
}
189188
}

src/main/java/at/favre/lib/bytes/Bytes.java

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ public static Bytes from(UUID uuid) {
518518
* @return decoded instance
519519
*/
520520
public static Bytes parseBinary(String binaryString) {
521-
return parse(binaryString, new BinaryToTextEncoding.BaseRadix(2));
521+
return parseRadix(binaryString, 2);
522522
}
523523

524524
/**
@@ -528,7 +528,7 @@ public static Bytes parseBinary(String binaryString) {
528528
* @return decoded instance
529529
*/
530530
public static Bytes parseOctal(String octalString) {
531-
return parse(octalString, new BinaryToTextEncoding.BaseRadix(8));
531+
return parseRadix(octalString, 8);
532532
}
533533

534534
/**
@@ -538,7 +538,22 @@ public static Bytes parseOctal(String octalString) {
538538
* @return decoded instance
539539
*/
540540
public static Bytes parseDec(String decString) {
541-
return parse(decString, new BinaryToTextEncoding.BaseRadix(10));
541+
return parseRadix(decString, 10);
542+
}
543+
544+
/**
545+
* Encodes with given radix string representation (e.g. radix 16 would be hex).
546+
* See also {@link BigInteger#toString(int)}.
547+
* <p>
548+
* This is usually a number encoding, not a data encoding (ie. leading zeros are not preserved), but this implementation
549+
* tries to preserve the leading zeros, to keep the in/output byte length size the same, but use at your own risk!
550+
*
551+
* @param radixNumberString the encoded string
552+
* @param radix radix of the String representation (supported are 2-36)
553+
* @return decoded instance
554+
*/
555+
public static Bytes parseRadix(String radixNumberString, int radix) {
556+
return parse(radixNumberString, new BinaryToTextEncoding.BaseRadixNumber(radix));
542557
}
543558

544559
/**
@@ -554,12 +569,17 @@ public static Bytes parseHex(String hexString) {
554569

555570
/**
556571
* Parsing of base36 encoded byte arrays.
572+
* <p>
573+
* This is usually a number encoding, not a data encoding (ie. leading zeros are not preserved), but this implementation
574+
* tries to preserve the leading zeros, to keep the in/output byte length size the same.
557575
*
558576
* @param base36String the encoded string
559577
* @return decoded instance
578+
* @deprecated use {@link #parseRadix(String, int)} with 36 instead; will be removed in v1.0+
560579
*/
580+
@Deprecated
561581
public static Bytes parseBase36(String base36String) {
562-
return parse(base36String, new BinaryToTextEncoding.BaseRadix(36));
582+
return parse(base36String, new BinaryToTextEncoding.BaseRadixNumber(36));
563583
}
564584

565585
/**
@@ -1447,7 +1467,7 @@ byte[] internalArray() {
14471467
* @see <a href="https://en.wikipedia.org/wiki/Binary_number">Binary number</a>
14481468
*/
14491469
public String encodeBinary() {
1450-
return encode(new BinaryToTextEncoding.BaseRadix(2));
1470+
return encodeRadix(2);
14511471
}
14521472

14531473
/**
@@ -1459,7 +1479,7 @@ public String encodeBinary() {
14591479
* @see <a href="https://en.wikipedia.org/wiki/Octal">Octal</a>
14601480
*/
14611481
public String encodeOctal() {
1462-
return encode(new BinaryToTextEncoding.BaseRadix(8));
1482+
return encodeRadix(8);
14631483
}
14641484

14651485
/**
@@ -1471,7 +1491,27 @@ public String encodeOctal() {
14711491
* @see <a href="https://en.wikipedia.org/wiki/Decimal">Decimal</a>
14721492
*/
14731493
public String encodeDec() {
1474-
return encode(new BinaryToTextEncoding.BaseRadix(10));
1494+
return encodeRadix(10);
1495+
}
1496+
1497+
/**
1498+
* Encodes the internal array in given radix representation (e.g. 2 = binary, 10 = decimal, 16 = hex).
1499+
* <p>
1500+
* This is usually a number encoding, not a data encoding (ie. leading zeros are not preserved), but this implementation
1501+
* tries to preserve the leading zeros, to keep the in/output byte length size the same. To preserve the length padding
1502+
* would be required, but is not supported in this implementation.
1503+
* <p>
1504+
* But still full disclaimer:
1505+
*
1506+
* <strong>This is NOT recommended for data encoding, only for number encoding</strong>
1507+
* <p>
1508+
* See <a href="https://en.wikipedia.org/wiki/Radix_economy">Radix Economy</a> and {@link BigInteger#toString(int)}.
1509+
*
1510+
* @param radix of the String representation (supported are 2-36)
1511+
* @return string in given radix representation
1512+
*/
1513+
public String encodeRadix(int radix) {
1514+
return encode(new BinaryToTextEncoding.BaseRadixNumber(radix));
14751515
}
14761516

14771517
/**
@@ -1499,16 +1539,20 @@ public String encodeHex(boolean upperCase) {
14991539
}
15001540

15011541
/**
1542+
* DO NOT USE AS DATA ENCODING, ONLY FOR NUMBERS!
1543+
* <p>
15021544
* Base36 (aka Hexatrigesimal) representation. The choice of 36 is convenient in that the digits can be
15031545
* represented using the Arabic numerals 0–9 and the Latin letters A–Z. This encoding has a space efficiency of 64.6%.
15041546
* <p>
15051547
* Example: <code>5qpdvuwjvu5</code>
15061548
*
15071549
* @return base36 string
15081550
* @see <a href="https://en.wikipedia.org/wiki/Base36">Base36</a>
1551+
* @deprecated use {@link #encodeRadix(int)} instead; will be removed in v1.0+
15091552
*/
1553+
@Deprecated
15101554
public String encodeBase36() {
1511-
return encode(new BinaryToTextEncoding.BaseRadix(36));
1555+
return encodeRadix(36);
15121556
}
15131557

15141558
/**

src/test/java/at/favre/lib/bytes/BinaryToTextEncodingTest.java

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
package at.favre.lib.bytes;
2323

24-
import org.junit.Ignore;
2524
import org.junit.Test;
2625

2726
import java.nio.ByteOrder;
@@ -43,40 +42,50 @@ public void decodeHexShouldFail() {
4342

4443
@Test
4544
public void encodeBaseRadix() {
46-
assertEquals("100211", new BinaryToTextEncoding.BaseRadix(16).encode(new byte[]{16, 2, 17}, ByteOrder.BIG_ENDIAN));
47-
assertEquals("110210", new BinaryToTextEncoding.BaseRadix(16).encode(new byte[]{16, 2, 17}, ByteOrder.LITTLE_ENDIAN));
48-
assertNotEquals(new BinaryToTextEncoding.BaseRadix(2).encode(new byte[]{1, 2, 3}, ByteOrder.LITTLE_ENDIAN), new BinaryToTextEncoding.BaseRadix(2).encode(new byte[]{1, 2, 3}, ByteOrder.BIG_ENDIAN));
45+
assertEquals("100211", new BinaryToTextEncoding.BaseRadixNumber(16).encode(new byte[]{16, 2, 17}, ByteOrder.BIG_ENDIAN));
46+
assertEquals("110210", new BinaryToTextEncoding.BaseRadixNumber(16).encode(new byte[]{16, 2, 17}, ByteOrder.LITTLE_ENDIAN));
47+
assertNotEquals(new BinaryToTextEncoding.BaseRadixNumber(2).encode(new byte[]{1, 2, 3}, ByteOrder.LITTLE_ENDIAN), new BinaryToTextEncoding.BaseRadixNumber(2).encode(new byte[]{1, 2, 3}, ByteOrder.BIG_ENDIAN));
4948
}
5049

5150
@Test
5251
public void encodeDecodeRadix() {
53-
for (int i = 0; i < 32; i++) {
54-
Bytes rnd = Bytes.random(i);
52+
int leadingZeroHits = 0;
53+
int encodings = 0;
54+
for (int i = 0; i < 64; i++) {
55+
Bytes rnd = Bytes.random(i % 256);
5556
System.out.println("\n\nNEW TEST: " + i + " bytes\n");
56-
for (int j = 16; j < 36; j++) {
57-
BinaryToTextEncoding.EncoderDecoder encoding = new BinaryToTextEncoding.BaseRadix(j);
57+
for (int j = 2; j <= 36; j++) {
58+
encodings++;
59+
BinaryToTextEncoding.EncoderDecoder encoding = new BinaryToTextEncoding.BaseRadixNumber(j);
5860
String encodedBigEndian = encoding.encode(rnd.array(), ByteOrder.BIG_ENDIAN);
5961
byte[] decoded = encoding.decode(encodedBigEndian);
6062
System.out.println("radix" + j + ":\t" + encodedBigEndian);
6163
System.out.println("orig :\t" + rnd.encodeHex());
6264
System.out.println("enc :\t" + Bytes.wrap(decoded).encodeHex());
63-
assertArrayEquals(rnd.array(), decoded);
65+
66+
67+
if (rnd.length() <= 0 || rnd.byteAt(0) != 0) {
68+
assertArrayEquals(rnd.array(), decoded);
69+
} else { //since this is a number, we allow different lengths due to leading zero
70+
leadingZeroHits++;
71+
assertArrayEquals(rnd.resize(rnd.length() - 1).array(), decoded);
72+
}
6473
}
6574
}
75+
System.out.println(leadingZeroHits + " leading zero mismatches of " + encodings + " encodings");
6676
}
6777

6878
@Test
69-
@Ignore("should fix")
7079
public void encodeDecodeRadixZeros() {
7180
Bytes bytes = Bytes.wrap(new byte[]{0, 0, 0, 0});
72-
BinaryToTextEncoding.EncoderDecoder encoding = new BinaryToTextEncoding.BaseRadix(36);
81+
BinaryToTextEncoding.EncoderDecoder encoding = new BinaryToTextEncoding.BaseRadixNumber(36);
7382
String encodedBigEndian = encoding.encode(bytes.array(), ByteOrder.BIG_ENDIAN);
7483
byte[] decoded = encoding.decode(encodedBigEndian);
7584

7685
System.out.println("radix36:\t" + encodedBigEndian);
7786
System.out.println("orig :\t" + bytes.encodeHex());
7887
System.out.println("enc :\t" + Bytes.wrap(decoded).encodeHex());
79-
assertArrayEquals(bytes.array(), decoded);
88+
assertArrayEquals(new byte[]{}, decoded);
8089
}
8190

8291
@Test
@@ -114,12 +123,12 @@ public void encodeDecodeHex() {
114123

115124
@Test(expected = IllegalArgumentException.class)
116125
public void decodeInvalidRadix16() {
117-
new BinaryToTextEncoding.BaseRadix(16).decode("AAI=");
126+
new BinaryToTextEncoding.BaseRadixNumber(16).decode("AAI=");
118127
}
119128

120129
@Test(expected = IllegalArgumentException.class)
121130
public void decodeInvalidRadix36() {
122-
new BinaryToTextEncoding.BaseRadix(36).decode("AAI=");
131+
new BinaryToTextEncoding.BaseRadixNumber(36).decode("AAI=");
123132
}
124133

125134
@Test
@@ -139,4 +148,24 @@ public void decodeInvalidBase64() {
139148
public void decodeHalfInvalidBase64() {
140149
new BinaryToTextEncoding.Base64Encoding().decode("EAI`");
141150
}
151+
152+
@Test(expected = IllegalArgumentException.class)
153+
public void encodeRadixIllegalTooHigh2() {
154+
new BinaryToTextEncoding.BaseRadixNumber(38);
155+
}
156+
157+
@Test(expected = IllegalArgumentException.class)
158+
public void encodeRadixIllegalTooHigh() {
159+
new BinaryToTextEncoding.BaseRadixNumber(37);
160+
}
161+
162+
@Test(expected = IllegalArgumentException.class)
163+
public void encodeRadixIllegalTooLow() {
164+
new BinaryToTextEncoding.BaseRadixNumber(1);
165+
}
166+
167+
@Test(expected = IllegalArgumentException.class)
168+
public void encodeRadixIllegalTooLow2() {
169+
new BinaryToTextEncoding.BaseRadixNumber(0);
170+
}
142171
}

src/test/java/at/favre/lib/bytes/BytesParseAndEncodingTest.java

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,4 +125,42 @@ public void encodeBase36() {
125125
assertEquals("69zbn", Bytes.from(defaultArray).encodeBase36());
126126
assertEquals("5qpdvuwjvu5", Bytes.from(encodingExample).encodeBase36());
127127
}
128-
}
128+
129+
@Test
130+
public void parseRadix() {
131+
assertArrayEquals(encodingExample, Bytes.parseRadix("1001010100101001111110111111111000111101010111111101101", 2).array());
132+
assertArrayEquals(encodingExample, Bytes.parseRadix("10202221221221000222101012210121012", 3).array());
133+
assertArrayEquals(encodingExample, Bytes.parseRadix("1022211033313333013222333231", 4).array());
134+
assertArrayEquals(encodingExample, Bytes.parseRadix("134003042232210013121402", 5).array());
135+
assertArrayEquals(encodingExample, Bytes.parseRadix("542412151505231515005", 6).array());
136+
assertArrayEquals(encodingExample, Bytes.parseRadix("1124517677707527755", 8).array());
137+
assertArrayEquals(encodingExample, Bytes.parseRadix("20992966904426477", 10).array());
138+
assertArrayEquals(encodingExample, Bytes.parseRadix("4a94fdff1eafed", 16).array());
139+
assertArrayEquals(encodingExample, Bytes.parseRadix("5iibpp5dgpgp", 26).array());
140+
assertArrayEquals(encodingExample, Bytes.parseRadix("5qpdvuwjvu5", 36).array());
141+
}
142+
143+
@Test
144+
public void encodeRadix() {
145+
assertEquals("1001010100101001111110111111111000111101010111111101101", Bytes.from(encodingExample).encodeRadix(2));
146+
assertEquals("10202221221221000222101012210121012", Bytes.from(encodingExample).encodeRadix(3));
147+
assertEquals("1022211033313333013222333231", Bytes.from(encodingExample).encodeRadix(4));
148+
assertEquals("134003042232210013121402", Bytes.from(encodingExample).encodeRadix(5));
149+
assertEquals("542412151505231515005", Bytes.from(encodingExample).encodeRadix(6));
150+
assertEquals("1124517677707527755", Bytes.from(encodingExample).encodeRadix(8));
151+
assertEquals("20992966904426477", Bytes.from(encodingExample).encodeRadix(10));
152+
assertEquals("4a94fdff1eafed", Bytes.from(encodingExample).encodeRadix(16));
153+
assertEquals("5iibpp5dgpgp", Bytes.from(encodingExample).encodeRadix(26));
154+
assertEquals("5qpdvuwjvu5", Bytes.from(encodingExample).encodeRadix(36));
155+
}
156+
157+
@Test(expected = IllegalArgumentException.class)
158+
public void encodeRadixIllegalTooHigh() {
159+
Bytes.from(encodingExample).encodeRadix(37);
160+
}
161+
162+
@Test(expected = IllegalArgumentException.class)
163+
public void encodeRadixIllegalTooLow() {
164+
Bytes.from(encodingExample).encodeRadix(1);
165+
}
166+
}

0 commit comments

Comments
 (0)