Skip to content

Commit 7c7dced

Browse files
authored
Merge pull request #22 from patrickfav/feat-21-base32
Implement Base32 encoding
2 parents 3127402 + f88de49 commit 7c7dced

File tree

8 files changed

+481
-33
lines changed

8 files changed

+481
-33
lines changed

CHANGELOG

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
## v0.8.0
44

55
* add radix encoding/parsing and fix radix tests #6, #20
6+
* add support for Base32 RFC4648 non-hex alphabet encoding/parsing #21
7+
8+
### Deprecations (will be removed in v1.0+)
9+
10+
* `parseBase36()`/`encodeBase36()` - use `parseRadix(36)`/`encodeRadix(36)` instead
611

712
## v0.7.1
813

@@ -29,6 +34,10 @@
2934
* add constructor/converter from/to UUID #9
3035
* add `empty()` constructor, creating empty byte array
3136

37+
### Deprecations (will be removed in v1.0+)
38+
39+
* `toObjectArray()` renamed to `toBoxedArray()`
40+
3241
## v0.5.0
3342

3443
* better resource handling for compression

README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ It's main features include:
3535
* **Creation** from a wide variety of sources: multiple arrays, integers, [streams](https://docs.oracle.com/javase/7/docs/api/java/io/InputStream.html), random, strings, files, uuid, ...
3636
* **Transformation** with many built-in: append, [xor](https://en.wikipedia.org/wiki/Exclusive_or), [and](https://en.wikipedia.org/wiki/Logical_conjunction), [hash](https://en.wikipedia.org/wiki/Cryptographic_hash_function), [shifts](https://en.wikipedia.org/wiki/Bitwise_operation#Bit_shifts), shuffle, reverse, [checksum](https://en.wikipedia.org/wiki/Checksum), ...
3737
* **Validators** with the ability to arbitrarily combine multiple ones with logical expressions
38-
* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base36](https://en.wikipedia.org/wiki/Base36), [base64](https://en.wikipedia.org/wiki/Base64), ...
38+
* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base32](https://en.wikipedia.org/wiki/Base32), [base64](https://en.wikipedia.org/wiki/Base64), ...
3939
* **Immutable, Mutable and Read-Only** versions
4040
* **Handling Strings** with encoding and normalizing strings for arbitrary charset
4141
* **Utility Features** like `indexOf`, `count`, `isEmpty`, `bitAt`, `contains` ...
@@ -312,7 +312,14 @@ Bytes.from(array).encodeBase64(); //"SpT9/x6v7Q=="
312312
Bytes.from(array).encodeBase64Url(); //"SpT9_x6v7Q=="
313313
```
314314

315-
Additionally the following encodings are supported:
315+
also a **Base32** encoder (using the RFC4648 non-hex alphabet):
316+
317+
```java
318+
Bytes.parseBase32("MZXQ====");
319+
Bytes.from(array).encodeBase32();
320+
```
321+
322+
Additionally the following radixe encodings are supported:
316323

317324
```java
318325
Bytes.from(array).encodeBinary(); //1110110110101111

pom.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,18 @@
215215
<version>4.12</version>
216216
<scope>test</scope>
217217
</dependency>
218+
<dependency>
219+
<groupId>org.openjdk.jmh</groupId>
220+
<artifactId>jmh-core</artifactId>
221+
<version>1.21</version>
222+
<scope>test</scope>
223+
</dependency>
224+
<dependency>
225+
<groupId>org.openjdk.jmh</groupId>
226+
<artifactId>jmh-generator-annprocess</artifactId>
227+
<version>1.21</version>
228+
<scope>test</scope>
229+
</dependency>
218230
</dependencies>
219231
<developers>
220232
<developer>
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
/*
2+
* Copyright 2018 Patrick Favre-Bulle
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing,
15+
* software distributed under the License is distributed on an
16+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17+
* KIND, either express or implied. See the License for the
18+
* specific language governing permissions and limitations
19+
* under the License.
20+
*/
21+
22+
package at.favre.lib.bytes;
23+
24+
import java.io.IOException;
25+
import java.nio.ByteOrder;
26+
import java.util.Arrays;
27+
import java.util.Objects;
28+
29+
/**
30+
* Encoder which supports arbitrary alphabet and padding.
31+
*
32+
* Derived from Google Guava's common/io/ BaseEncoding
33+
* <p>
34+
* See: https://github.com/google/guava/blob/v26.0/guava/src/com/google/common/io/BaseEncoding.java
35+
*/
36+
final class BaseEncoding implements BinaryToTextEncoding.EncoderDecoder {
37+
private static final char ASCII_MAX = 127;
38+
39+
static final Alphabet BASE32_RFC4848 = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567".toCharArray());
40+
static final char BASE32_RFC4848_PADDING = '=';
41+
42+
private final Alphabet alphabet;
43+
private final Character paddingChar;
44+
45+
BaseEncoding(Alphabet alphabet, Character paddingChar) {
46+
this.alphabet = Objects.requireNonNull(alphabet);
47+
this.paddingChar = paddingChar;
48+
}
49+
50+
private int maxEncodedSize(int bytes) {
51+
return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk);
52+
}
53+
54+
@Override
55+
public String encode(byte[] array, ByteOrder byteOrder) {
56+
return encode(array, 0, array.length);
57+
}
58+
59+
private String encode(byte[] bytes, int off, int len) {
60+
StringBuilder result = new StringBuilder(maxEncodedSize(len));
61+
try {
62+
encodeTo(result, bytes, off, len);
63+
} catch (IOException impossible) {
64+
throw new AssertionError(impossible);
65+
}
66+
return result.toString();
67+
}
68+
69+
private void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
70+
Objects.requireNonNull(target);
71+
for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
72+
encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
73+
}
74+
}
75+
76+
private void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
77+
Objects.requireNonNull(target);
78+
long bitBuffer = 0;
79+
for (int i = 0; i < len; ++i) {
80+
bitBuffer |= bytes[off + i] & 0xFF;
81+
bitBuffer <<= 8; // Add additional zero byte in the end.
82+
}
83+
// Position of first character is length of bitBuffer minus bitsPerChar.
84+
final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
85+
int bitsProcessed = 0;
86+
while (bitsProcessed < len * 8) {
87+
int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
88+
target.append(alphabet.encode(charIndex));
89+
bitsProcessed += alphabet.bitsPerChar;
90+
}
91+
if (paddingChar != null) {
92+
while (bitsProcessed < alphabet.bytesPerChunk * 8) {
93+
target.append(paddingChar);
94+
bitsProcessed += alphabet.bitsPerChar;
95+
}
96+
}
97+
}
98+
99+
private int maxDecodedSize(int chars) {
100+
return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
101+
}
102+
103+
private String trimTrailingPadding(CharSequence chars) {
104+
Objects.requireNonNull(chars);
105+
if (paddingChar == null) {
106+
return chars.toString();
107+
}
108+
int l;
109+
for (l = chars.length() - 1; l >= 0; l--) {
110+
if (chars.charAt(l) != paddingChar) {
111+
break;
112+
}
113+
}
114+
return chars.subSequence(0, l + 1).toString();
115+
}
116+
117+
@Override
118+
public byte[] decode(String encoded) {
119+
encoded = trimTrailingPadding(encoded);
120+
byte[] tmp = new byte[maxDecodedSize(encoded.length())];
121+
int len = decodeTo(tmp, encoded);
122+
return extract(tmp, len);
123+
}
124+
125+
private static byte[] extract(byte[] result, int length) {
126+
if (length == result.length) {
127+
return result;
128+
} else {
129+
byte[] trunc = new byte[length];
130+
System.arraycopy(result, 0, trunc, 0, length);
131+
return trunc;
132+
}
133+
}
134+
135+
private int decodeTo(byte[] target, CharSequence chars) {
136+
Objects.requireNonNull(target);
137+
chars = trimTrailingPadding(chars);
138+
int bytesWritten = 0;
139+
for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
140+
long chunk = 0;
141+
int charsProcessed = 0;
142+
for (int i = 0; i < alphabet.charsPerChunk; i++) {
143+
chunk <<= alphabet.bitsPerChar;
144+
if (charIdx + i < chars.length()) {
145+
chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
146+
}
147+
}
148+
final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
149+
for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
150+
target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
151+
}
152+
}
153+
return bytesWritten;
154+
}
155+
156+
static final class Alphabet {
157+
// this is meant to be immutable -- don't modify it!
158+
private final char[] chars;
159+
final int mask;
160+
final int bitsPerChar;
161+
final int charsPerChunk;
162+
final int bytesPerChunk;
163+
private final byte[] decodabet;
164+
165+
Alphabet(char[] chars) {
166+
this.chars = Objects.requireNonNull(chars);
167+
this.bitsPerChar = log2(chars.length);
168+
169+
/*
170+
* e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
171+
* for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
172+
*/
173+
int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
174+
this.charsPerChunk = 8 / gcd;
175+
this.bytesPerChunk = bitsPerChar / gcd;
176+
this.mask = chars.length - 1;
177+
178+
byte[] decodabet = new byte[ASCII_MAX + 1];
179+
Arrays.fill(decodabet, (byte) -1);
180+
for (int i = 0; i < chars.length; i++) {
181+
char c = chars[i];
182+
decodabet[c] = (byte) i;
183+
}
184+
this.decodabet = decodabet;
185+
}
186+
187+
char encode(int bits) {
188+
return chars[bits];
189+
}
190+
191+
int decode(char ch) {
192+
return (int) decodabet[ch];
193+
}
194+
}
195+
196+
private static int divide(int p, int q) {
197+
int div = p / q;
198+
int rem = p - q * div; // equal to p % q
199+
200+
if (rem == 0) {
201+
return div;
202+
}
203+
int signum = 1 | ((p ^ q) >> (Integer.SIZE - 1));
204+
return signum > 0 ? div + signum : div;
205+
}
206+
207+
private static int log2(int x) {
208+
return (Integer.SIZE - 1) - Integer.numberOfLeadingZeros(x);
209+
}
210+
}

src/main/java/at/favre/lib/bytes/Bytes.java

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ public static Bytes wrap(byte[] array) {
156156
* @return new instance
157157
*/
158158
public static Bytes wrap(byte[] array, ByteOrder byteOrder) {
159-
Objects.requireNonNull(array, "passed array must not be null");
160-
return new Bytes(array, byteOrder);
159+
return new Bytes(Objects.requireNonNull(array, "passed array must not be null"), byteOrder);
161160
}
162161

163162
/**
@@ -168,8 +167,7 @@ public static Bytes wrap(byte[] array, ByteOrder byteOrder) {
168167
* @return new instance
169168
*/
170169
public static Bytes from(byte[] byteArrayToCopy) {
171-
Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte");
172-
return wrap(Arrays.copyOf(byteArrayToCopy, byteArrayToCopy.length));
170+
return wrap(Arrays.copyOf(Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte"), byteArrayToCopy.length));
173171
}
174172

175173
/**
@@ -318,8 +316,7 @@ public static Bytes from(int integer4byte) {
318316
* @return new instance
319317
*/
320318
public static Bytes from(int... intArray) {
321-
Objects.requireNonNull(intArray, "must provide at least a single int");
322-
return wrap(Util.toByteArray(intArray));
319+
return wrap(Util.toByteArray(Objects.requireNonNull(intArray, "must provide at least a single int")));
323320
}
324321

325322
/**
@@ -339,8 +336,7 @@ public static Bytes from(long long8byte) {
339336
* @return new instance
340337
*/
341338
public static Bytes from(long... longArray) {
342-
Objects.requireNonNull(longArray, "must provide at least a single long");
343-
return wrap(Util.toByteArray(longArray));
339+
return wrap(Util.toByteArray(Objects.requireNonNull(longArray, "must provide at least a single long")));
344340
}
345341

346342
/**
@@ -470,9 +466,7 @@ public static Bytes from(CharSequence utf8String, Normalizer.Form form) {
470466
* @return new instance
471467
*/
472468
public static Bytes from(CharSequence string, Charset charset) {
473-
Objects.requireNonNull(string, "provided string must not be null");
474-
Objects.requireNonNull(charset, "provided charset must not be null");
475-
return wrap(string.toString().getBytes(charset));
469+
return wrap(Objects.requireNonNull(string, "provided string must not be null").toString().getBytes(Objects.requireNonNull(charset, "provided charset must not be null")));
476470
}
477471

478472
/**
@@ -567,6 +561,18 @@ public static Bytes parseHex(String hexString) {
567561
return parse(hexString, new BinaryToTextEncoding.Hex());
568562
}
569563

564+
/**
565+
* Parsing of base32/RFC 4648 encoded byte arrays.
566+
* <p>
567+
* Uses the RFC 4648 non-hex alphabet, see <a href="https://en.wikipedia.org/wiki/Base32#RFC_4648_Base32_alphabet">Base32 alphabet</a>.
568+
*
569+
* @param base32Rfc4648String the encoded string
570+
* @return decoded instance
571+
*/
572+
public static Bytes parseBase32(String base32Rfc4648String) {
573+
return parse(base32Rfc4648String, new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING));
574+
}
575+
570576
/**
571577
* Parsing of base36 encoded byte arrays.
572578
* <p>
@@ -1538,6 +1544,19 @@ public String encodeHex(boolean upperCase) {
15381544
return encode(new BinaryToTextEncoding.Hex(upperCase));
15391545
}
15401546

1547+
/**
1548+
* Base32 RFC4648 string representation of the internal byte array (not Base32 hex alphabet extension)
1549+
* <p>
1550+
* Example: <code>MZXW6YQ=</code>
1551+
* <p>
1552+
* See <a href="https://tools.ietf.org/html/rfc4648">RFC 4648</a>
1553+
*
1554+
* @return base32 string
1555+
*/
1556+
public String encodeBase32() {
1557+
return encode(new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING));
1558+
}
1559+
15411560
/**
15421561
* DO NOT USE AS DATA ENCODING, ONLY FOR NUMBERS!
15431562
* <p>
@@ -1597,8 +1616,7 @@ public String encodeUtf8() {
15971616
* @return encoded string
15981617
*/
15991618
public String encodeCharset(Charset charset) {
1600-
Objects.requireNonNull(charset, "given charset must not be null");
1601-
return new String(internalArray(), charset);
1619+
return new String(internalArray(), Objects.requireNonNull(charset, "given charset must not be null"));
16021620
}
16031621

16041622
/**

0 commit comments

Comments
 (0)