Skip to content

Commit b21ed76

Browse files
committed
Implement Base32 encoding powered by Guava's BaseEncoding
fixes #21
1 parent 3127402 commit b21ed76

File tree

6 files changed

+360
-33
lines changed

6 files changed

+360
-33
lines changed

CHANGELOG

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## v0.8.0
44

55
* add radix encoding/parsing and fix radix tests #6, #20
6+
* add support for Base32 RFC4648 non-hex alphabet encoding/parsing #21
67

78
## v0.7.1
89

README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ It's main features include:
3535
* **Creation** from a wide variety of sources: multiple arrays, integers, [streams](https://docs.oracle.com/javase/7/docs/api/java/io/InputStream.html), random, strings, files, uuid, ...
3636
* **Transformation** with many built-in: append, [xor](https://en.wikipedia.org/wiki/Exclusive_or), [and](https://en.wikipedia.org/wiki/Logical_conjunction), [hash](https://en.wikipedia.org/wiki/Cryptographic_hash_function), [shifts](https://en.wikipedia.org/wiki/Bitwise_operation#Bit_shifts), shuffle, reverse, [checksum](https://en.wikipedia.org/wiki/Checksum), ...
3737
* **Validators** with the ability to arbitrarily combine multiple ones with logical expressions
38-
* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base36](https://en.wikipedia.org/wiki/Base36), [base64](https://en.wikipedia.org/wiki/Base64), ...
38+
* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base32](https://en.wikipedia.org/wiki/Base32), [base64](https://en.wikipedia.org/wiki/Base64), ...
3939
* **Immutable, Mutable and Read-Only** versions
4040
* **Handling Strings** with encoding and normalizing strings for arbitrary charset
4141
* **Utility Features** like `indexOf`, `count`, `isEmpty`, `bitAt`, `contains` ...
@@ -312,7 +312,14 @@ Bytes.from(array).encodeBase64(); //"SpT9/x6v7Q=="
312312
Bytes.from(array).encodeBase64Url(); //"SpT9_x6v7Q=="
313313
```
314314

315-
Additionally the following encodings are supported:
315+
also a **Base32** encoder (using the RFC4648 non-hex alphabet):
316+
317+
```java
318+
Bytes.parseBase32("MZXQ====");
319+
Bytes.from(array).encodeBase32();
320+
```
321+
322+
Additionally the following radixe encodings are supported:
316323

317324
```java
318325
Bytes.from(array).encodeBinary(); //1110110110101111
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
/*
2+
* Copyright 2018 Patrick Favre-Bulle
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing,
15+
* software distributed under the License is distributed on an
16+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17+
* KIND, either express or implied. See the License for the
18+
* specific language governing permissions and limitations
19+
* under the License.
20+
*/
21+
22+
package at.favre.lib.bytes;
23+
24+
import java.io.IOException;
25+
import java.nio.ByteOrder;
26+
import java.util.Arrays;
27+
import java.util.Objects;
28+
29+
/**
30+
* Derived from Google Guava's common/io/ BaseEncoding
31+
* <p>
32+
* See: https://github.com/google/guava/blob/v26.0/guava/src/com/google/common/io/BaseEncoding.java
33+
*/
34+
final class BaseEncoding implements BinaryToTextEncoding.EncoderDecoder {
35+
private static final char ASCII_MAX = 127;
36+
37+
static final Alphabet BASE32_RFC4848 = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567".toCharArray());
38+
static final char BASE32_RFC4848_PADDING = '=';
39+
40+
private final Alphabet alphabet;
41+
private final Character paddingChar;
42+
43+
BaseEncoding(Alphabet alphabet, Character paddingChar) {
44+
this.alphabet = Objects.requireNonNull(alphabet);
45+
this.paddingChar = paddingChar;
46+
}
47+
48+
private int maxEncodedSize(int bytes) {
49+
return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk);
50+
}
51+
52+
@Override
53+
public String encode(byte[] array, ByteOrder byteOrder) {
54+
return encode(array, 0, array.length);
55+
}
56+
57+
private String encode(byte[] bytes, int off, int len) {
58+
StringBuilder result = new StringBuilder(maxEncodedSize(len));
59+
try {
60+
encodeTo(result, bytes, off, len);
61+
} catch (IOException impossible) {
62+
throw new AssertionError(impossible);
63+
}
64+
return result.toString();
65+
}
66+
67+
private void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
68+
Objects.requireNonNull(target);
69+
for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
70+
encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
71+
}
72+
}
73+
74+
private void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
75+
Objects.requireNonNull(target);
76+
long bitBuffer = 0;
77+
for (int i = 0; i < len; ++i) {
78+
bitBuffer |= bytes[off + i] & 0xFF;
79+
bitBuffer <<= 8; // Add additional zero byte in the end.
80+
}
81+
// Position of first character is length of bitBuffer minus bitsPerChar.
82+
final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
83+
int bitsProcessed = 0;
84+
while (bitsProcessed < len * 8) {
85+
int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
86+
target.append(alphabet.encode(charIndex));
87+
bitsProcessed += alphabet.bitsPerChar;
88+
}
89+
if (paddingChar != null) {
90+
while (bitsProcessed < alphabet.bytesPerChunk * 8) {
91+
target.append(paddingChar);
92+
bitsProcessed += alphabet.bitsPerChar;
93+
}
94+
}
95+
}
96+
97+
private int maxDecodedSize(int chars) {
98+
return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
99+
}
100+
101+
private String trimTrailingPadding(CharSequence chars) {
102+
Objects.requireNonNull(chars);
103+
if (paddingChar == null) {
104+
return chars.toString();
105+
}
106+
int l;
107+
for (l = chars.length() - 1; l >= 0; l--) {
108+
if (chars.charAt(l) != paddingChar) {
109+
break;
110+
}
111+
}
112+
return chars.subSequence(0, l + 1).toString();
113+
}
114+
115+
@Override
116+
public byte[] decode(String encoded) {
117+
encoded = trimTrailingPadding(encoded);
118+
byte[] tmp = new byte[maxDecodedSize(encoded.length())];
119+
int len = decodeTo(tmp, encoded);
120+
return extract(tmp, len);
121+
}
122+
123+
private static byte[] extract(byte[] result, int length) {
124+
if (length == result.length) {
125+
return result;
126+
} else {
127+
byte[] trunc = new byte[length];
128+
System.arraycopy(result, 0, trunc, 0, length);
129+
return trunc;
130+
}
131+
}
132+
133+
private int decodeTo(byte[] target, CharSequence chars) {
134+
Objects.requireNonNull(target);
135+
chars = trimTrailingPadding(chars);
136+
int bytesWritten = 0;
137+
for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
138+
long chunk = 0;
139+
int charsProcessed = 0;
140+
for (int i = 0; i < alphabet.charsPerChunk; i++) {
141+
chunk <<= alphabet.bitsPerChar;
142+
if (charIdx + i < chars.length()) {
143+
chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
144+
}
145+
}
146+
final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
147+
for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
148+
target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
149+
}
150+
}
151+
return bytesWritten;
152+
}
153+
154+
private static final class Alphabet {
155+
// this is meant to be immutable -- don't modify it!
156+
private final char[] chars;
157+
final int mask;
158+
final int bitsPerChar;
159+
final int charsPerChunk;
160+
final int bytesPerChunk;
161+
private final byte[] decodabet;
162+
163+
Alphabet(char[] chars) {
164+
this.chars = Objects.requireNonNull(chars);
165+
this.bitsPerChar = log2(chars.length);
166+
167+
/*
168+
* e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
169+
* for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
170+
*/
171+
int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
172+
this.charsPerChunk = 8 / gcd;
173+
this.bytesPerChunk = bitsPerChar / gcd;
174+
this.mask = chars.length - 1;
175+
176+
byte[] decodabet = new byte[ASCII_MAX + 1];
177+
Arrays.fill(decodabet, (byte) -1);
178+
for (int i = 0; i < chars.length; i++) {
179+
char c = chars[i];
180+
decodabet[c] = (byte) i;
181+
}
182+
this.decodabet = decodabet;
183+
}
184+
185+
char encode(int bits) {
186+
return chars[bits];
187+
}
188+
189+
int decode(char ch) {
190+
return (int) decodabet[ch];
191+
}
192+
}
193+
194+
private static int divide(int p, int q) {
195+
int div = p / q;
196+
int rem = p - q * div; // equal to p % q
197+
198+
if (rem == 0) {
199+
return div;
200+
}
201+
int signum = 1 | ((p ^ q) >> (Integer.SIZE - 1));
202+
return signum > 0 ? div + signum : div;
203+
}
204+
205+
private static int log2(int x) {
206+
return (Integer.SIZE - 1) - Integer.numberOfLeadingZeros(x);
207+
}
208+
}

src/main/java/at/favre/lib/bytes/Bytes.java

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ public static Bytes wrap(byte[] array) {
156156
* @return new instance
157157
*/
158158
public static Bytes wrap(byte[] array, ByteOrder byteOrder) {
159-
Objects.requireNonNull(array, "passed array must not be null");
160-
return new Bytes(array, byteOrder);
159+
return new Bytes(Objects.requireNonNull(array, "passed array must not be null"), byteOrder);
161160
}
162161

163162
/**
@@ -168,8 +167,7 @@ public static Bytes wrap(byte[] array, ByteOrder byteOrder) {
168167
* @return new instance
169168
*/
170169
public static Bytes from(byte[] byteArrayToCopy) {
171-
Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte");
172-
return wrap(Arrays.copyOf(byteArrayToCopy, byteArrayToCopy.length));
170+
return wrap(Arrays.copyOf(Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte"), byteArrayToCopy.length));
173171
}
174172

175173
/**
@@ -318,8 +316,7 @@ public static Bytes from(int integer4byte) {
318316
* @return new instance
319317
*/
320318
public static Bytes from(int... intArray) {
321-
Objects.requireNonNull(intArray, "must provide at least a single int");
322-
return wrap(Util.toByteArray(intArray));
319+
return wrap(Util.toByteArray(Objects.requireNonNull(intArray, "must provide at least a single int")));
323320
}
324321

325322
/**
@@ -339,8 +336,7 @@ public static Bytes from(long long8byte) {
339336
* @return new instance
340337
*/
341338
public static Bytes from(long... longArray) {
342-
Objects.requireNonNull(longArray, "must provide at least a single long");
343-
return wrap(Util.toByteArray(longArray));
339+
return wrap(Util.toByteArray(Objects.requireNonNull(longArray, "must provide at least a single long")));
344340
}
345341

346342
/**
@@ -470,9 +466,7 @@ public static Bytes from(CharSequence utf8String, Normalizer.Form form) {
470466
* @return new instance
471467
*/
472468
public static Bytes from(CharSequence string, Charset charset) {
473-
Objects.requireNonNull(string, "provided string must not be null");
474-
Objects.requireNonNull(charset, "provided charset must not be null");
475-
return wrap(string.toString().getBytes(charset));
469+
return wrap(Objects.requireNonNull(string, "provided string must not be null").toString().getBytes(Objects.requireNonNull(charset, "provided charset must not be null")));
476470
}
477471

478472
/**
@@ -567,6 +561,18 @@ public static Bytes parseHex(String hexString) {
567561
return parse(hexString, new BinaryToTextEncoding.Hex());
568562
}
569563

564+
/**
565+
* Parsing of base32/RFC 4648 encoded byte arrays.
566+
* <p>
567+
* Uses the RFC 4648 non-hex alphabet, see <a href="https://en.wikipedia.org/wiki/Base32#RFC_4648_Base32_alphabet">Base32 alphabet</a>.
568+
*
569+
* @param base32Rfc4648String the encoded string
570+
* @return decoded instance
571+
*/
572+
public static Bytes parseBase32(String base32Rfc4648String) {
573+
return parse(base32Rfc4648String, new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING));
574+
}
575+
570576
/**
571577
* Parsing of base36 encoded byte arrays.
572578
* <p>
@@ -1538,6 +1544,19 @@ public String encodeHex(boolean upperCase) {
15381544
return encode(new BinaryToTextEncoding.Hex(upperCase));
15391545
}
15401546

1547+
/**
1548+
* Base32 RFC4648 string representation of the internal byte array (not Base32 hex alphabet extension)
1549+
* <p>
1550+
* Example: <code>MZXW6YQ=</code>
1551+
* <p>
1552+
* See <a href="https://tools.ietf.org/html/rfc4648">RFC 4648</a>
1553+
*
1554+
* @return base32 string
1555+
*/
1556+
public String encodeBase32() {
1557+
return encode(new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING));
1558+
}
1559+
15411560
/**
15421561
* DO NOT USE AS DATA ENCODING, ONLY FOR NUMBERS!
15431562
* <p>
@@ -1597,8 +1616,7 @@ public String encodeUtf8() {
15971616
* @return encoded string
15981617
*/
15991618
public String encodeCharset(Charset charset) {
1600-
Objects.requireNonNull(charset, "given charset must not be null");
1601-
return new String(internalArray(), charset);
1619+
return new String(internalArray(), Objects.requireNonNull(charset, "given charset must not be null"));
16021620
}
16031621

16041622
/**

0 commit comments

Comments
 (0)