Skip to content

Commit efa66c4

Browse files
committed
Rewrite entropy util to use more memory friendly version
1 parent c4e0e1b commit efa66c4

File tree

3 files changed

+43
-54
lines changed

3 files changed

+43
-54
lines changed

src/main/java/at/favre/lib/bytes/Bytes.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,13 +1402,13 @@ public int count(byte[] pattern) {
14021402
* variables. Specifically, assuming for simplicity that each of the microscopic configurations is equally probable,
14031403
* the entropy of the system is the natural logarithm of that number of configurations, multiplied by the Boltzmann constant kB.
14041404
* <p>
1405-
* This implementation requires O(n) time and space complexity.
1405+
* This implementation requires O(n) time and O(1) space complexity.
14061406
*
14071407
* @return entropy value; higher is more entropy (simply: more different values)
14081408
* @see <a href="https://en.wikipedia.org/wiki/Entropy">Entropy</a>
14091409
*/
14101410
public double entropy() {
1411-
return new Util.Entropy<>(toList()).entropy();
1411+
return Util.Byte.entropy(internalArray());
14121412
}
14131413

14141414
/* CONVERTERS POSSIBLY REUSING THE INTERNAL ARRAY ***************************************************************/

src/main/java/at/favre/lib/bytes/Util.java

Lines changed: 30 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,36 @@ static boolean constantTimeEquals(byte[] obj, byte[] anotherArray) {
268268
}
269269
return result == 0;
270270
}
271+
272+
/**
273+
* Calculates the entropy factor of a byte array.
274+
* <p>
275+
* This implementation will not create a copy of the internal array and will only internally initialize
276+
* a int array with 256 elements as temporary buffer.
277+
*
278+
* @param array to calculate the entropy from
279+
* @return entropy factor, higher means higher entropy
280+
*/
281+
static double entropy(byte[] array) {
282+
final int[] buffer = new int[256];
283+
Arrays.fill(buffer, -1);
284+
285+
for (byte element : array) {
286+
int unsigned = 0xff & element;
287+
if (buffer[unsigned] == -1) {
288+
buffer[unsigned] = 0;
289+
}
290+
buffer[unsigned]++;
291+
}
292+
293+
double entropy = 0;
294+
for (int count : buffer) {
295+
if (count == -1) continue;
296+
double prob = (double) count / array.length;
297+
entropy -= prob * (Math.log(prob) / Math.log(2));
298+
}
299+
return entropy;
300+
}
271301
}
272302

273303
/**
@@ -683,58 +713,6 @@ static byte[] readFromFile(java.io.File file, int offset, int length) {
683713
private Util() {
684714
}
685715

686-
/*
687-
=================================================================================================
688-
Copyright 2011 Twitter, Inc.
689-
-------------------------------------------------------------------------------------------------
690-
Licensed under the Apache License, Version 2.0 (the "License");
691-
you may not use this work except in compliance with the License.
692-
You may obtain a copy of the License in the LICENSE file, or at:
693-
694-
http://www.apache.org/licenses/LICENSE-2.0
695-
696-
Unless required by applicable law or agreed to in writing, software
697-
distributed under the License is distributed on an "AS IS" BASIS,
698-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
699-
See the License for the specific language governing permissions and
700-
limitations under the License.
701-
=================================================================================================
702-
*/
703-
704-
/**
705-
* Class that calculates the entropy factor
706-
*
707-
* @param <T>
708-
*/
709-
@SuppressWarnings("WeakerAccess")
710-
static final class Entropy<T> {
711-
private final Map<T, Integer> map = new HashMap<>();
712-
private int total = 0;
713-
714-
public Entropy(Iterable<T> elements) {
715-
for (T element : elements) {
716-
if (!map.containsKey(element)) {
717-
map.put(element, 0);
718-
}
719-
map.put(element, map.get(element) + 1);
720-
total++;
721-
}
722-
}
723-
724-
private double Log2(double n) {
725-
return Math.log(n) / Math.log(2);
726-
}
727-
728-
public double entropy() {
729-
double entropy = 0;
730-
for (int count : map.values()) {
731-
double prob = (double) count / total;
732-
entropy -= prob * Log2(prob);
733-
}
734-
return entropy;
735-
}
736-
}
737-
738716
/**
739717
* A simple iterator for the bytes class, which does not support remove
740718
*/

src/test/java/at/favre/lib/bytes/UtilByteTest.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,4 +234,15 @@ public void testRightShiftAgainstRefImpl() {
234234
}
235235
}
236236
}
237+
238+
@Test
239+
public void entropy() {
240+
assertEquals(0, Util.Byte.entropy(new byte[0]), 0.1d);
241+
assertEquals(0, Util.Byte.entropy(new byte[1]), 0.1d);
242+
assertEquals(0, Util.Byte.entropy(new byte[256]), 0.1d);
243+
assertEquals(0, Util.Byte.entropy(new byte[]{1}), 0.1d);
244+
assertTrue(Util.Byte.entropy(new byte[]{(byte) 0x8E, (byte) 0xD1, (byte) 0xFD, (byte) 0xAA, 0x12, (byte) 0xAF, (byte) 0x78, 0x09, 0x1E, (byte) 0xD1, (byte) 0xFD, (byte) 0xAA, 0x12, (byte) 0xAF, (byte) 0x00, 0x0A, (byte) 0xEE, (byte) 0xD1, (byte) 0xFD, (byte) 0xAA, 0x12, (byte) 0xAF, (byte) 0x78, 0x11}) > 3.5);
245+
assertTrue(Util.Byte.entropy(new byte[]{0x4A, (byte) 0x94, (byte) 0xFD, (byte) 0xFF, 0x1E, (byte) 0xAF, (byte) 0xED}) > 2.5);
246+
assertTrue(Util.Byte.entropy(new byte[]{0x1A, 0x6F}) > 0.5);
247+
}
237248
}

0 commit comments

Comments
 (0)