diff --git a/.classpath b/.classpath
deleted file mode 100644
index 534b5e5..0000000
--- a/.classpath
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/.gitignore b/.gitignore
index e43e9a3..4359056 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,10 @@
*.war
*.ear
+.classpath
+.settings
+.project
+
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
/target
diff --git a/.project b/.project
deleted file mode 100644
index 1202499..0000000
--- a/.project
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
- encoding-test
-
-
-
-
-
- org.eclipse.jdt.core.javabuilder
-
-
-
-
- org.eclipse.m2e.core.maven2Builder
-
-
-
-
-
- org.eclipse.jdt.core.javanature
- org.eclipse.m2e.core.maven2Nature
-
-
diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs
deleted file mode 100644
index 60105c1..0000000
--- a/.settings/org.eclipse.jdt.core.prefs
+++ /dev/null
@@ -1,5 +0,0 @@
-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
-org.eclipse.jdt.core.compiler.compliance=1.6
-org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
-org.eclipse.jdt.core.compiler.source=1.6
diff --git a/.settings/org.eclipse.m2e.core.prefs b/.settings/org.eclipse.m2e.core.prefs
deleted file mode 100644
index f897a7f..0000000
--- a/.settings/org.eclipse.m2e.core.prefs
+++ /dev/null
@@ -1,4 +0,0 @@
-activeProfiles=
-eclipse.preferences.version=1
-resolveWorkspaceProjects=true
-version=1
diff --git a/README.md b/README.md
index 83697aa..8e73b04 100644
--- a/README.md
+++ b/README.md
@@ -5,119 +5,212 @@ Testing encoding size when forced to go through a UTF string (i.e. bytes -> UTF
Output:
```
-long direct: 800
-UTF-8 bytes of .toString(): 1900
-UTF-8 bytes of Base:16: 1600
-UTF-8 bytes of Base:17: 1600
-UTF-8 bytes of Base:18: 1600
-UTF-8 bytes of Base:19: 1500
-UTF-8 bytes of Base:20: 1500
-UTF-8 bytes of Base:21: 1500
-UTF-8 bytes of Base:22: 1500
-UTF-8 bytes of Base:23: 1400
-UTF-8 bytes of Base:24: 1400
-UTF-8 bytes of Base:25: 1400
-UTF-8 bytes of Base:26: 1400
-UTF-8 bytes of Base:27: 1400
-UTF-8 bytes of Base:28: 1400
-UTF-8 bytes of Base:29: 1300
-UTF-8 bytes of Base:30: 1300
-UTF-8 bytes of Base:31: 1300
-UTF-8 bytes of Base:32: 1300
-UTF-8 bytes of Base:33: 1300
-UTF-8 bytes of Base:34: 1300
-UTF-8 bytes of Base:35: 1300
-UTF-8 bytes of Base:36: 1300
-UTF-8 bytes of Base:37: 1300
-UTF-8 bytes of Base:38: 1300
-UTF-8 bytes of Base:39: 1200
-UTF-8 bytes of Base:40: 1200
-UTF-8 bytes of Base:41: 1200
-UTF-8 bytes of Base:42: 1200
-UTF-8 bytes of Base:43: 1200
-UTF-8 bytes of Base:44: 1200
-UTF-8 bytes of Base:45: 1200
-UTF-8 bytes of Base:46: 1200
-UTF-8 bytes of Base:47: 1200
-UTF-8 bytes of Base:48: 1200
-UTF-8 bytes of Base:49: 1200
-UTF-8 bytes of Base:50: 1200
-UTF-8 bytes of Base:51: 1200
-UTF-8 bytes of Base:52: 1200
-UTF-8 bytes of Base:53: 1100
-UTF-8 bytes of Base:54: 1100
-UTF-8 bytes of Base:55: 1100
-UTF-8 bytes of Base:56: 1100
-UTF-8 bytes of Base:57: 1100
-UTF-8 bytes of Base:58: 1100
-UTF-8 bytes of Base:59: 1100
-UTF-8 bytes of Base:60: 1100
-UTF-8 bytes of Base:61: 1100
-UTF-8 bytes of Base:62: 1100
-UTF-8 bytes of Base:63: 1100
-UTF-8 bytes of Base:64: 1100
-UTF-8 bytes of Base:65: 1100
-UTF-8 bytes of Base:66: 1100
-UTF-8 bytes of Base:67: 1100
-UTF-8 bytes of Base:68: 1100
-UTF-8 bytes of Base:69: 1100
-UTF-8 bytes of Base:70: 1100
-UTF-8 bytes of Base:71: 1100
-UTF-8 bytes of Base:72: 1100
-UTF-8 bytes of Base:73: 1100
-UTF-8 bytes of Base:74: 1100
-UTF-8 bytes of Base:75: 1100
-UTF-8 bytes of Base:76: 1100
-UTF-8 bytes of Base:77: 1100
-UTF-8 bytes of Base:78: 1100
-UTF-8 bytes of Base:79: 1000
-UTF-8 bytes of Base:80: 1000
-UTF-8 bytes of Base:81: 1000
-UTF-8 bytes of Base:82: 1000
-UTF-8 bytes of Base:83: 1000
-UTF-8 bytes of Base:84: 1000
-UTF-8 bytes of Base:85: 1000
-UTF-8 bytes of Base:86: 1000
-UTF-8 bytes of Base:87: 1000
-UTF-8 bytes of Base:88: 1000
-UTF-8 bytes of Base:89: 1000
-UTF-8 bytes of Base:90: 1000
-UTF-8 bytes of Base:91: 1000
-UTF-8 bytes of Base:92: 1000
-UTF-8 bytes of Base:93: 1000
-UTF-8 bytes of Base:94: 1000
-UTF-8 bytes of Base:95: 1000
-UTF-8 bytes of Base:96: 1000
-UTF-8 bytes of Base:97: 1000
-UTF-8 bytes of Base:98: 1000
-UTF-8 bytes of Base:99: 1000
-UTF-8 bytes of Base:100: 1000
-UTF-8 bytes of Base:101: 1000
-UTF-8 bytes of Base:102: 1000
-UTF-8 bytes of Base:103: 1000
-UTF-8 bytes of Base:104: 1000
-UTF-8 bytes of Base:105: 1000
-UTF-8 bytes of Base:106: 1000
-UTF-8 bytes of Base:107: 1000
-UTF-8 bytes of Base:108: 1000
-UTF-8 bytes of Base:109: 1000
-UTF-8 bytes of Base:110: 1000
-UTF-8 bytes of Base:111: 1000
-UTF-8 bytes of Base:112: 1000
-UTF-8 bytes of Base:113: 1000
-UTF-8 bytes of Base:114: 1000
-UTF-8 bytes of Base:115: 1000
-UTF-8 bytes of Base:116: 1000
-UTF-8 bytes of Base:117: 1000
-UTF-8 bytes of Base:118: 1000
-UTF-8 bytes of Base:119: 1000
-UTF-8 bytes of Base:120: 1000
-UTF-8 bytes of Base:121: 1000
-UTF-8 bytes of Base:122: 1000
-UTF-8 bytes of Base:123: 1000
-UTF-8 bytes of Base:124: 1000
-UTF-8 bytes of Base:125: 1000
-UTF-8 bytes of Base:126: 1000
-UTF-8 bytes of Base:127: 1000
-UTF-8 bytes of Base:128: 900
+Encoding: Bennight Base 2
+Average Encoded Bytes (2 original bytes): 17.0
+Average Encoded Bytes (3 original bytes): 25.0
+Average Encoded Bytes (4 original bytes): 33.0
+Average Encoded Bytes (5 original bytes): 41.0
+Average Encoded Bytes (6 original bytes): 49.0
+Average Encoded Bytes (7 original bytes): 57.0
+Average Encoded Bytes (8 original bytes): 65.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:08:44.604
+Time in decode: 0:23:39.100
+***************************
+
+
+Encoding: Bennight Base 4
+Average Encoded Bytes (2 original bytes): 9.0
+Average Encoded Bytes (3 original bytes): 13.0
+Average Encoded Bytes (4 original bytes): 17.0
+Average Encoded Bytes (5 original bytes): 21.0
+Average Encoded Bytes (6 original bytes): 25.0
+Average Encoded Bytes (7 original bytes): 29.0
+Average Encoded Bytes (8 original bytes): 33.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:04:31.915
+Time in decode: 0:10:23.793
+***************************
+
+
+Encoding: Bennight Base 8
+Average Encoded Bytes (2 original bytes): 6.0
+Average Encoded Bytes (3 original bytes): 9.0
+Average Encoded Bytes (4 original bytes): 11.0
+Average Encoded Bytes (5 original bytes): 14.0
+Average Encoded Bytes (6 original bytes): 17.0
+Average Encoded Bytes (7 original bytes): 19.0
+Average Encoded Bytes (8 original bytes): 22.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:03:05.284
+Time in decode: 0:06:25.902
+***************************
+
+
+Encoding: Bennight Base 16
+Average Encoded Bytes (2 original bytes): 5.0
+Average Encoded Bytes (3 original bytes): 7.0
+Average Encoded Bytes (4 original bytes): 9.0
+Average Encoded Bytes (5 original bytes): 11.0
+Average Encoded Bytes (6 original bytes): 13.0
+Average Encoded Bytes (7 original bytes): 15.0
+Average Encoded Bytes (8 original bytes): 17.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:02:28.658
+Time in decode: 0:04:47.536
+***************************
+
+
+Encoding: Bennight Base 32
+Average Encoded Bytes (2 original bytes): 4.0
+Average Encoded Bytes (3 original bytes): 5.0
+Average Encoded Bytes (4 original bytes): 7.0
+Average Encoded Bytes (5 original bytes): 9.0
+Average Encoded Bytes (6 original bytes): 10.0
+Average Encoded Bytes (7 original bytes): 12.0
+Average Encoded Bytes (8 original bytes): 13.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:02:00.650
+Time in decode: 0:03:49.458
+***************************
+
+
+Encoding: Bennight Base 64
+Average Encoded Bytes (2 original bytes): 3.0
+Average Encoded Bytes (3 original bytes): 5.0
+Average Encoded Bytes (4 original bytes): 6.0
+Average Encoded Bytes (5 original bytes): 7.0
+Average Encoded Bytes (6 original bytes): 9.0
+Average Encoded Bytes (7 original bytes): 10.0
+Average Encoded Bytes (8 original bytes): 11.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:01:42.823
+Time in decode: 0:03:08.459
+***************************
+
+
+Encoding: Bennight Base 85
+Average Encoded Bytes (2 original bytes): 3.0
+Average Encoded Bytes (3 original bytes): 4.0
+Average Encoded Bytes (4 original bytes): 5.9614105224609375
+Average Encoded Bytes (5 original bytes): 7.0
+Average Encoded Bytes (6 original bytes): 8.0
+Average Encoded Bytes (7 original bytes): 9.0
+Average Encoded Bytes (8 original bytes): 10.75
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:01:39.455
+Time in decode: 0:03:08.377
+***************************
+
+
+Encoding: Bennight Base 128
+Average Encoded Bytes (2 original bytes): 3.0
+Average Encoded Bytes (3 original bytes): 4.0
+Average Encoded Bytes (4 original bytes): 5.0
+Average Encoded Bytes (5 original bytes): 6.0
+Average Encoded Bytes (6 original bytes): 7.0
+Average Encoded Bytes (7 original bytes): 9.0
+Average Encoded Bytes (8 original bytes): 10.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:01:27.609
+Time in decode: 0:02:32.070
+***************************
+
+
+Encoding: Guava Base 16
+Average Encoded Bytes (2 original bytes): 4.0
+Average Encoded Bytes (3 original bytes): 6.0
+Average Encoded Bytes (4 original bytes): 8.0
+Average Encoded Bytes (5 original bytes): 10.0
+Average Encoded Bytes (6 original bytes): 12.0
+Average Encoded Bytes (7 original bytes): 14.0
+Average Encoded Bytes (8 original bytes): 16.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:00:28.147
+Time in decode: 0:00:38.829
+***************************
+
+
+Encoding: Guava Base 32
+Average Encoded Bytes (2 original bytes): 8.0
+Average Encoded Bytes (3 original bytes): 8.0
+Average Encoded Bytes (4 original bytes): 8.0
+Average Encoded Bytes (5 original bytes): 8.0
+Average Encoded Bytes (6 original bytes): 16.0
+Average Encoded Bytes (7 original bytes): 16.0
+Average Encoded Bytes (8 original bytes): 16.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:00:23.337
+Time in decode: 0:00:55.243
+***************************
+
+
+Encoding: Guava Base 64
+Average Encoded Bytes (2 original bytes): 4.0
+Average Encoded Bytes (3 original bytes): 4.0
+Average Encoded Bytes (4 original bytes): 8.0
+Average Encoded Bytes (5 original bytes): 8.0
+Average Encoded Bytes (6 original bytes): 8.0
+Average Encoded Bytes (7 original bytes): 12.0
+Average Encoded Bytes (8 original bytes): 12.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:00:21.299
+Time in decode: 0:00:47.335
+***************************
+
+
+Encoding: HBase Base 64
+Average Encoded Bytes (2 original bytes): 4.0
+Average Encoded Bytes (3 original bytes): 4.0
+Average Encoded Bytes (4 original bytes): 8.0
+Average Encoded Bytes (5 original bytes): 8.0
+Average Encoded Bytes (6 original bytes): 8.0
+Average Encoded Bytes (7 original bytes): 12.0
+Average Encoded Bytes (8 original bytes): 12.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:01:04.723
+Time in decode: 0:05:30.249
+***************************
+
+
+Encoding: Base 91
+Average Encoded Bytes (2 original bytes): 3.0
+Average Encoded Bytes (3 original bytes): 4.0
+Average Encoded Bytes (4 original bytes): 5.0
+Average Encoded Bytes (5 original bytes): 6.7965087890625
+Average Encoded Bytes (6 original bytes): 7.97198486328125
+Average Encoded Bytes (7 original bytes): 8.99853515625
+Average Encoded Bytes (8 original bytes): 10.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:00:56.579
+Time in decode: 0:00:59.654
+***************************
+
+
+Encoding: Base 128
+Average Encoded Bytes (2 original bytes): 3.0
+Average Encoded Bytes (3 original bytes): 4.0
+Average Encoded Bytes (4 original bytes): 5.0
+Average Encoded Bytes (5 original bytes): 6.0
+Average Encoded Bytes (6 original bytes): 7.0
+Average Encoded Bytes (7 original bytes): 8.0
+Average Encoded Bytes (8 original bytes): 10.0
+Total Successes: 71434240
+Success Rate: 1.0
+Time in encode: 0:00:35.485
+Time in decode: 0:00:36.025
+***************************
```
diff --git a/pom.xml b/pom.xml
index 86a582d..e35a18d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,6 +10,28 @@
guava
18.0
+
+ com.googlecode.combinatoricslib
+ combinatoricslib
+ 2.1
+
+
+ org.apache.hbase
+ hbase
+ 0.90.2
+
+
+ org.jruby
+ jruby-complete
+
+
+
+
+ junit
+ junit
+ 4.11
+ test
+
@@ -18,8 +40,8 @@
maven-compiler-plugin
3.1
- 1.6
- 1.6
+ 1.7
+ 1.7
diff --git a/src/main/java/com/bennight/BaseNEncoder.java b/src/main/java/com/bennight/BaseNEncoder.java
index d1ea712..6dda8fd 100644
--- a/src/main/java/com/bennight/BaseNEncoder.java
+++ b/src/main/java/com/bennight/BaseNEncoder.java
@@ -4,44 +4,58 @@
import java.util.HashMap;
import java.util.Map;
-public class BaseNEncoder {
-
-
+import org.apache.commons.lang.ArrayUtils;
+
+import com.fecher.BinaryEncoding;
+
+public class BaseNEncoder implements
+ BinaryEncoding
+{
+
private final char[] _dictionary;
private final Map _reverseDictionary;
private final BigInteger _base;
-
-
- public BaseNEncoder(char[] dictionary){
+
+ public BaseNEncoder(
+ final char[] dictionary ) {
_dictionary = dictionary;
_base = BigInteger.valueOf(dictionary.length);
_reverseDictionary = reverseDictionary(dictionary);
}
-
- public BaseNEncoder(int base) throws Exception{
- if (base > 128){
- throw new Exception("Only handles mapping to ASCII char set, base must be <= 128");
- }
+
+ public BaseNEncoder(
+ final int base ) {
+ // if (base > 128) {
+ // throw new Exception(
+ // "Only handles mapping to ASCII char set, base must be <= 128");
+ // }
_dictionary = new char[base];
- for (int x = 0; x < base; x++){
- _dictionary[x] = (char)x;
+ for (int x = 0; x < base; x++) {
+ _dictionary[x] = (char) x;
}
-
+
_base = BigInteger.valueOf(base);
_reverseDictionary = reverseDictionary(_dictionary);
}
-
- private static Map reverseDictionary(char[] dictionary){
- Map reverse = new HashMap();
- for (int i = 0; i < dictionary.length; i++){
- reverse.put(dictionary[i], i);
+
+ private static Map reverseDictionary(
+ final char[] dictionary ) {
+ final Map reverse = new HashMap();
+ for (int i = 0; i < dictionary.length; i++) {
+ reverse.put(
+ dictionary[i],
+ i);
}
return reverse;
}
-
- public String encode(BigInteger value){
- StringBuilder s = new StringBuilder();
- BigInteger[] parts = { value, BigInteger.ZERO };
+
+ public String encode(
+ final BigInteger value ) {
+ final StringBuilder s = new StringBuilder();
+ BigInteger[] parts = {
+ value,
+ BigInteger.ZERO
+ };
while (parts[0].compareTo(_base) >= 0) {
parts = parts[0].divideAndRemainder(_base);
s.append(_dictionary[parts[1].intValue()]);
@@ -49,15 +63,43 @@ public String encode(BigInteger value){
s.append(_dictionary[parts[0].intValue()]);
return s.reverse().toString();
}
-
- public BigInteger decode(String encoded) {
+
+ public BigInteger decodeInt(
+ final String encoded ) {
BigInteger result = BigInteger.ZERO;
- int len = encoded.length();
+ final int len = encoded.length();
for (int i = 0; i < len; i++) {
- BigInteger digit = BigInteger.valueOf(_reverseDictionary.get(encoded.charAt(i)));
+ final BigInteger digit = BigInteger.valueOf(_reverseDictionary.get(encoded.charAt(i)));
result = result.add(digit.multiply(_base.pow((len - i) - 1)));
}
- return result;
+ return new BigInteger(
+ result.toByteArray());
+ }
+
+ @Override
+ public String encode(
+ final byte[] binary ) {
+ return encode(new BigInteger(
+ ArrayUtils.addAll(
+ new byte[] {
+ 1
+ },
+ binary)));
}
+ @Override
+ public byte[] decode(
+ final String str ) {
+ final byte[] paddedArray = decodeInt(
+ str).toByteArray();
+ return ArrayUtils.subarray(
+ paddedArray,
+ 1,
+ paddedArray.length);
+ }
+
+ @Override
+ public String getEncodingName() {
+ return "Bennight Base " + _base;
+ }
}
diff --git a/src/main/java/com/fecher/Base128Encoding.java b/src/main/java/com/fecher/Base128Encoding.java
new file mode 100644
index 0000000..2d7a834
--- /dev/null
+++ b/src/main/java/com/fecher/Base128Encoding.java
@@ -0,0 +1,156 @@
+package com.fecher;
+
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Modified version of Jochaim Henke's original code from
+ * http://base91.sourceforge.net/
+ *
+ * basE91 encoding/decoding routines
+ *
+ * Copyright (c) 2000-2006 Joachim Henke All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer. - Redistributions in binary
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials provided
+ * with the distribution. - Neither the name of Joachim Henke nor the names of
+ * his contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @author Joachim Henke (Original version)
+ * @author Benedikt Waldvogel (Modifications)
+ * @author Rich Fecher (More Modifications)
+ */
+public class Base128Encoding implements
+ BinaryEncoding
+{
+
+ public static final byte[] ENCODING_TABLE;
+ private static final byte[] DECODING_TABLE;
+ private static final int BASE;
+ private static final double AVERAGE_ENCODING_RATIO = 8 / 7;
+
+ static {
+ final StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < 128; i++) {
+ buf.append((char) i);
+ }
+ ENCODING_TABLE = buf.toString().getBytes();
+ BASE = ENCODING_TABLE.length;
+
+ DECODING_TABLE = new byte[256];
+ for (int i = 0; i < 256; ++i) {
+ DECODING_TABLE[i] = -1;
+ }
+
+ for (int i = 0; i < BASE; ++i) {
+ DECODING_TABLE[ENCODING_TABLE[i]] = (byte) i;
+ }
+ }
+
+ @Override
+ public String encode(
+ final byte[] data ) {
+ return new String(
+ encodeBytes(data));
+ }
+
+ @Override
+ public String getEncodingName() {
+ return "Base 128";
+ }
+
+ public static byte[] encodeBytes(
+ final byte[] data ) {
+
+ final int estimatedSize = (int) Math.ceil(data.length * AVERAGE_ENCODING_RATIO);
+ final ByteArrayOutputStream output = new ByteArrayOutputStream(
+ estimatedSize);
+
+ int ebq = 0;
+ int en = 0;
+ for (int i = 0; i < data.length; ++i) {
+ ebq |= (data[i] & 255) << en;
+ en += 8;
+ if (en > 13) {
+ int ev = ebq & 8191;
+
+ ev = ebq & 16383;
+ ebq >>= 14;
+ en -= 14;
+ output.write(ENCODING_TABLE[ev % BASE]);
+ output.write(ENCODING_TABLE[ev / BASE]);
+ }
+ }
+
+ if (en > 0) {
+ output.write(ENCODING_TABLE[ebq % BASE]);
+ if ((en > 7)) {
+ output.write(ENCODING_TABLE[ebq / BASE]);
+ }
+ }
+
+ return output.toByteArray();
+ }
+
+ @Override
+ public byte[] decode(
+ final String str ) {
+ return decodeBytes(str.getBytes());
+ }
+
+ public static byte[] decodeBytes(
+ final byte[] data ) {
+
+ int dbq = 0;
+ int dn = 0;
+ int dv = -1;
+
+ final int estimatedSize = (int) Math.round(data.length / AVERAGE_ENCODING_RATIO);
+ final ByteArrayOutputStream output = new ByteArrayOutputStream(
+ estimatedSize);
+
+ for (int i = 0; i < data.length; ++i) {
+ if (DECODING_TABLE[data[i]] == -1) {
+ continue;
+ }
+ if (dv == -1) {
+ dv = DECODING_TABLE[data[i]];
+ }
+ else {
+ dv += DECODING_TABLE[data[i]] * BASE;
+ dbq |= dv << dn;
+ dn += 14;
+ do {
+ output.write((byte) dbq);
+ dbq >>= 8;
+ dn -= 8;
+ }
+ while (dn > 7);
+ dv = -1;
+ }
+ }
+
+ if (dv != -1) {
+ output.write((byte) (dbq | (dv << dn)));
+ }
+
+ return output.toByteArray();
+ }
+}
diff --git a/src/main/java/com/fecher/Base91.java b/src/main/java/com/fecher/Base91.java
new file mode 100644
index 0000000..1de7edf
--- /dev/null
+++ b/src/main/java/com/fecher/Base91.java
@@ -0,0 +1,161 @@
+package com.fecher;
+
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Modified version of Jochaim Henke's original code from
+ * http://base91.sourceforge.net/
+ *
+ * basE91 encoding/decoding routines
+ *
+ * Copyright (c) 2000-2006 Joachim Henke All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer. - Redistributions in binary
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials provided
+ * with the distribution. - Neither the name of Joachim Henke nor the names of
+ * his contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @author Joachim Henke (Original version)
+ * @author Benedikt Waldvogel (Modifications)
+ */
+public class Base91 implements
+ BinaryEncoding
+{
+
+ public static final byte[] ENCODING_TABLE;
+ private static final byte[] DECODING_TABLE;
+ private static final int BASE;
+ private static final float AVERAGE_ENCODING_RATIO = 1.2297f;
+
+ static {
+ final String ts = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!#$%&()*+,./:;<=>?@[]^_`{|}~\"";
+ ENCODING_TABLE = ts.getBytes();
+ BASE = ENCODING_TABLE.length;
+
+ DECODING_TABLE = new byte[256];
+ for (int i = 0; i < 256; ++i) {
+ DECODING_TABLE[i] = -1;
+ }
+
+ for (int i = 0; i < BASE; ++i) {
+ DECODING_TABLE[ENCODING_TABLE[i]] = (byte) i;
+ }
+ }
+
+ @Override
+ public String encode(
+ final byte[] data ) {
+ return new String(
+ encodeBytes(data));
+ }
+
+ @Override
+ public String getEncodingName() {
+ return "Base 91";
+ }
+
+ @Override
+ public byte[] decode(
+ final String str ) {
+ return decodeBytes(str.getBytes());
+ }
+
+ public static byte[] encodeBytes(
+ final byte[] data ) {
+
+ final int estimatedSize = (int) Math.ceil(data.length * AVERAGE_ENCODING_RATIO);
+ final ByteArrayOutputStream output = new ByteArrayOutputStream(
+ estimatedSize);
+
+ int ebq = 0;
+ int en = 0;
+ for (int i = 0; i < data.length; ++i) {
+ ebq |= (data[i] & 255) << en;
+ en += 8;
+ if (en > 13) {
+ int ev = ebq & 8191;
+
+ if (ev > 88) {
+ ebq >>= 13;
+ en -= 13;
+ }
+ else {
+ ev = ebq & 16383;
+ ebq >>= 14;
+ en -= 14;
+ }
+ output.write(ENCODING_TABLE[ev % BASE]);
+ output.write(ENCODING_TABLE[ev / BASE]);
+ }
+ }
+
+ if (en > 0) {
+ output.write(ENCODING_TABLE[ebq % BASE]);
+ if ((en > 7) || (ebq > 90)) {
+ output.write(ENCODING_TABLE[ebq / BASE]);
+ }
+ }
+
+ return output.toByteArray();
+ }
+
+ public static byte[] decodeBytes(
+ final byte[] data ) {
+
+ // if (data.length == 0)
+ // return new byte[] {};
+
+ int dbq = 0;
+ int dn = 0;
+ int dv = -1;
+
+ final int estimatedSize = Math.round(data.length / AVERAGE_ENCODING_RATIO);
+ final ByteArrayOutputStream output = new ByteArrayOutputStream(
+ estimatedSize);
+
+ for (int i = 0; i < data.length; ++i) {
+ if (DECODING_TABLE[data[i]] == -1) {
+ continue;
+ }
+ if (dv == -1) {
+ dv = DECODING_TABLE[data[i]];
+ }
+ else {
+ dv += DECODING_TABLE[data[i]] * BASE;
+ dbq |= dv << dn;
+ dn += (dv & 8191) > 88 ? 13 : 14;
+ do {
+ output.write((byte) dbq);
+ dbq >>= 8;
+ dn -= 8;
+ }
+ while (dn > 7);
+ dv = -1;
+ }
+ }
+
+ if (dv != -1) {
+ output.write((byte) (dbq | (dv << dn)));
+ }
+
+ return output.toByteArray();
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/fecher/BinaryEncoding.java b/src/main/java/com/fecher/BinaryEncoding.java
new file mode 100644
index 0000000..c616326
--- /dev/null
+++ b/src/main/java/com/fecher/BinaryEncoding.java
@@ -0,0 +1,12 @@
+package com.fecher;
+
+public interface BinaryEncoding
+{
+ public String encode(
+ byte[] binary );
+
+ public byte[] decode(
+ String str );
+
+ public String getEncodingName();
+}
diff --git a/src/main/java/com/fecher/GuavaBaseNEncoding.java b/src/main/java/com/fecher/GuavaBaseNEncoding.java
new file mode 100644
index 0000000..e525717
--- /dev/null
+++ b/src/main/java/com/fecher/GuavaBaseNEncoding.java
@@ -0,0 +1,44 @@
+package com.fecher;
+
+import com.google.common.io.BaseEncoding;
+
+public class GuavaBaseNEncoding implements
+BinaryEncoding
+{
+ private BaseEncoding guavaEncoding;
+ private final int n;
+
+ public GuavaBaseNEncoding(
+ final int n ) {
+ switch (n) {
+ case 16:
+ guavaEncoding = BaseEncoding.base16();
+ break;
+ case 32:
+ guavaEncoding = BaseEncoding.base32();
+ break;
+ case 64:
+ guavaEncoding = BaseEncoding.base64();
+ break;
+ }
+ this.n = n;
+ }
+
+ @Override
+ public String encode(
+ final byte[] binary ) {
+ return guavaEncoding.encode(binary);
+ }
+
+ @Override
+ public byte[] decode(
+ final String str ) {
+ return guavaEncoding.decode(str);
+ }
+
+ @Override
+ public String getEncodingName() {
+ return "Guava Base " + n;
+ }
+
+}
diff --git a/src/main/java/com/fecher/HBase64Encoding.java b/src/main/java/com/fecher/HBase64Encoding.java
new file mode 100644
index 0000000..d58d77b
--- /dev/null
+++ b/src/main/java/com/fecher/HBase64Encoding.java
@@ -0,0 +1,32 @@
+package com.fecher;
+
+import org.apache.hadoop.hbase.util.Base64;
+
+public class HBase64Encoding implements
+BinaryEncoding
+{
+
+ public HBase64Encoding() {}
+
+ @Override
+ public String encode(
+ final byte[] binary ) {
+ return Base64.encodeBytes(
+ binary,
+ Base64.ORDERED);
+ }
+
+ @Override
+ public byte[] decode(
+ final String str ) {
+ return Base64.decode(
+ str,
+ Base64.ORDERED);
+ }
+
+ @Override
+ public String getEncodingName() {
+ return "HBase Base 64";
+ }
+
+}
diff --git a/src/test/java/com/fecher/EncodingTest.java b/src/test/java/com/fecher/EncodingTest.java
new file mode 100644
index 0000000..8091044
--- /dev/null
+++ b/src/test/java/com/fecher/EncodingTest.java
@@ -0,0 +1,194 @@
+package com.fecher;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.time.StopWatch;
+import org.junit.Test;
+import org.paukov.combinatorics.Factory;
+import org.paukov.combinatorics.ICombinatoricsVector;
+
+import com.bennight.BaseNEncoder;
+
+public class EncodingTest
+{
+ private final static Charset UTF_8 = Charset.forName("UTF-8");
+
+ private static Stats[] TEST_ENCODING = new Stats[] {
+ new Stats(
+ new BaseNEncoder(
+ 2)),
+ new Stats(
+ new BaseNEncoder(
+ 4)),
+ new Stats(
+ new BaseNEncoder(
+ 8)),
+ new Stats(
+ new BaseNEncoder(
+ 16)),
+ new Stats(
+ new BaseNEncoder(
+ 32)),
+ new Stats(
+ new BaseNEncoder(
+ 64)),
+ new Stats(
+ new BaseNEncoder(
+ 85)),
+ new Stats(
+ new BaseNEncoder(
+ 128)),
+ new Stats(
+ new GuavaBaseNEncoding(
+ 16)),
+ new Stats(
+ new GuavaBaseNEncoding(
+ 32)),
+ new Stats(
+ new GuavaBaseNEncoding(
+ 64)),
+ new Stats(
+ new HBase64Encoding()),
+ new Stats(
+ new Base91()),
+ new Stats(
+ new Base128Encoding()),
+ };
+
+ private static class Stats
+ {
+ private final Map totalBytes = new HashMap();
+ // private long totalEncodings = 0;
+ private final Map totalEncodings = new HashMap();
+ private final BinaryEncoding encoding;
+ private long totalSuccesses = 0;
+ private final StopWatch encodeTimer = new StopWatch();
+ private final StopWatch decodeTimer = new StopWatch();
+
+ public Stats(
+ final BinaryEncoding encoding ) {
+ this.encoding = encoding;
+ encodeTimer.start();
+ encodeTimer.suspend();
+
+ decodeTimer.start();
+ decodeTimer.suspend();
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder str = new StringBuilder();
+ str.append(
+ "Encoding: ").append(
+ encoding.getEncodingName()).append(
+ '\n');
+ long total = 0;
+ for (final Entry e : totalEncodings.entrySet()) {
+ str.append(
+ // "Total Encodings (").append(
+ // e.getKey()).append(
+ // " original bytes): ").append(
+ // e.getValue()).append(
+ // '\n').append(
+ // "Total Encoded Bytes (").append(
+ // e.getKey()).append(
+ // " original bytes): ").append(
+ // totalBytes.get(e.getKey())).append(
+ // '\n').append(
+ "Average Encoded Bytes (").append(
+ e.getKey()).append(
+ " original bytes): ").append(
+ (double) totalBytes.get(e.getKey()) / (double) e.getValue()).append(
+ '\n');
+ total += e.getValue();
+ }
+ str.append(
+ "Total Successes: ").append(
+ totalSuccesses).append(
+ '\n').append(
+ "Success Rate: ").append(
+ (double) totalSuccesses / (double) total).append(
+ '\n').append(
+ "Time in encode: ").append(
+ encodeTimer.toString()).append(
+ '\n').append(
+ "Time in decode: ").append(
+ decodeTimer.toString()).append(
+ '\n').append(
+ "***************************\n\n");
+ return str.toString();
+ }
+ }
+
+ public static void main(
+ final String[] args ) {
+ new EncodingTest().test();
+ }
+
+ @Test
+ public void test() {
+
+ for (int k = 2; k <= 8; k++) {
+ for (final Stats stats : TEST_ENCODING) {
+ stats.totalBytes.put(
+ k,
+ 0L);
+ stats.totalEncodings.put(
+ k,
+ 0L);
+ }
+ final int divisor = (int) Math.pow(
+ 2,
+ k - 1);
+ final Byte[] bytesArray = new Byte[((Byte.MAX_VALUE - Byte.MIN_VALUE) + 1) / divisor];
+ int i = 0;
+ for (int b = Byte.MIN_VALUE; b <= Byte.MAX_VALUE; b += divisor) {
+ bytesArray[i++] = (byte) b;
+ }
+
+ final ICombinatoricsVector bytesVector = Factory.createVector(bytesArray);
+ final Iterator> it = Factory.createPermutationWithRepetitionGenerator(
+ bytesVector,
+ k).iterator();
+
+ while (it.hasNext()) {
+ final ICombinatoricsVector bytes = it.next();
+ final byte[] bytesPrimitive = ArrayUtils.toPrimitive(bytes.getVector().toArray(
+ new Byte[] {}));
+ for (final Stats stats : TEST_ENCODING) {
+ stats.encodeTimer.resume();
+ final String str = stats.encoding.encode(bytesPrimitive);
+ stats.encodeTimer.suspend();
+ final byte[] utf8Encoding = str.getBytes(UTF_8);
+ final String utfChars = new String(
+ utf8Encoding,
+ Charset.forName("UTF-8"));
+
+ stats.decodeTimer.resume();
+ final byte[] original = stats.encoding.decode(utfChars);
+ stats.decodeTimer.suspend();
+ if (Arrays.equals(
+ bytesPrimitive,
+ original)) {
+ stats.totalSuccesses++;
+ }
+ stats.totalBytes.put(
+ k,
+ stats.totalBytes.get(k) + utf8Encoding.length);
+ stats.totalEncodings.put(
+ k,
+ stats.totalEncodings.get(k) + 1);
+ }
+ }
+ }
+ for (final Stats stats : TEST_ENCODING) {
+ System.out.println(stats.toString());
+ }
+ }
+}