diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java index 61dd388083..9b8f9c3e2c 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java @@ -30,7 +30,7 @@ public abstract class AbstractFilter implements BloomFilter { @Serial private static final long serialVersionUID = 1L; - private final BitSet bitSet; + protected final BitSet bitSet; /** * 容量 */ @@ -50,12 +50,12 @@ public AbstractFilter(final int size) { @Override public boolean contains(final String str) { - return bitSet.get(Math.abs(hash(str))); + return bitSet.get(hash(str)); } @Override public boolean add(final String str) { - final int hash = Math.abs(hash(str)); + final int hash = hash(str); if (bitSet.get(hash)) { return false; } diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java index 8881d1806d..f764468387 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java @@ -16,7 +16,12 @@ package cn.hutool.v7.core.text.bloom; +import cn.hutool.v7.core.lang.Assert; + import java.io.Serial; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import java.util.function.Function; /** @@ -33,26 +38,69 @@ public class FuncFilter extends AbstractFilter { * 创建FuncFilter * * @param size 最大值 - * @param hashFunc Hash函数 + * @param hashFuncs Hash函数 * @return FuncFilter */ - public static FuncFilter of(final int size, final Function hashFunc) { - return new FuncFilter(size, hashFunc); + @SafeVarargs + public static FuncFilter of(final int size, final Function... hashFuncs) { + return new FuncFilter(size, hashFuncs); } - private final Function hashFunc; + // 允许接收多个哈希函数 + private final List> hashFuncs; /** * @param size 最大值 - * @param hashFunc Hash函数 + * @param hashFuncs Hash函数 */ - public FuncFilter(final int size, final Function hashFunc) { + @SafeVarargs + public FuncFilter(final int size, final Function... hashFuncs) { super(size); - this.hashFunc = hashFunc; + Assert.notEmpty(hashFuncs, "Hash functions must not be empty"); + this.hashFuncs = Collections.unmodifiableList(Arrays.asList(hashFuncs)); } + /** + *兼容父类,如果存在多个哈希函数,就使用第一个 + * + * @param str 字符串 + */ @Override public int hash(final String str) { - return hashFunc.apply(str).intValue() % size; + return hash(str, hashFuncs.get(0)); + } + + /** + * + * @param str 字符串 + * @param hashFunc 哈希函数 + * @return HashCode 指定哈希函数的计算结果 + */ + public int hash(final String str, final Function hashFunc) { + // 通过位运算获取正数 + return (hashFunc.apply(str).intValue() & 0x7FFFFFFF) % size; + } + + @Override + public boolean contains(final String str) { + for (final Function hashFunc : hashFuncs) { + if (!bitSet.get(hash(str, hashFunc))) { + return false; + } + } + return true; + } + + @Override + public boolean add(final String str) { + boolean add = false; + for (final Function hashFunc : hashFuncs) { + int hash = hash(str, hashFunc); + if (!bitSet.get(hash)) { + bitSet.set(hash); + add = true; + } + } + return add; } } diff --git a/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java b/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java index 6706e34d78..a6851654f3 100644 --- a/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java +++ b/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java @@ -22,11 +22,12 @@ public class BitMapBloomFilterTest { + private static final int SIZE = 2 * 1024 * 1024 * 8; + @Test public void filterTest() { - final int size = 2 * 1024 * 1024 * 8; - final CombinedBloomFilter filter = new CombinedBloomFilter(FuncFilter.of(size, HashUtil::rsHash)); + final CombinedBloomFilter filter = new CombinedBloomFilter(FuncFilter.of(SIZE, HashUtil::rsHash)); filter.add("123"); filter.add("abc"); filter.add("ddd"); @@ -35,4 +36,68 @@ public void filterTest() { Assertions.assertTrue(filter.contains("ddd")); Assertions.assertTrue(filter.contains("123")); } + @Test + public void multiHashFuncTest() { + final FuncFilter filter = FuncFilter.of(SIZE, + HashUtil::rsHash, + HashUtil::jsHash, + HashUtil::pjwHash, + HashUtil::elfHash, + HashUtil::bkdrHash, + HashUtil::sdbmHash, + HashUtil::djbHash, + HashUtil::dekHash, + HashUtil::apHash, + HashUtil::javaDefaultHash + ); + + filter.add("Hutool"); + filter.add("BloomFilter"); + filter.add("Java"); + + Assertions.assertTrue(filter.contains("Hutool")); + Assertions.assertTrue(filter.contains("BloomFilter")); + Assertions.assertTrue(filter.contains("Java")); + Assertions.assertFalse(filter.contains("Python")); + Assertions.assertFalse(filter.contains("Go")); + Assertions.assertFalse(filter.contains("hutool")); + } + + @Test + public void combinedMultiHashTest() { + FuncFilter multiHashFuncFilter = FuncFilter.of(SIZE, + HashUtil::bkdrHash, + HashUtil::apHash, + HashUtil::djbHash + ); + final CombinedBloomFilter filter = new CombinedBloomFilter(multiHashFuncFilter); + filter.add("123123WASD-WASD"); + Assertions.assertTrue(filter.contains("123123WASD-WASD")); + Assertions.assertFalse(filter.contains("123123WASD-WASD-false")); + } + + @Test + public void chineseStringWithThreeHashesTest() { + final FuncFilter filter = FuncFilter.of(SIZE, + HashUtil::bkdrHash, + HashUtil::apHash, + HashUtil::djbHash + ); + + String s1 = "你好世界"; + String s2 = "双亲委派"; + String s3 = "测试工程师"; + + filter.add(s1); + filter.add(s2); + filter.add(s3); + Assertions.assertTrue(filter.contains(s1), "应包含: " + s1); + Assertions.assertTrue(filter.contains(s2), "应包含: " + s2); + Assertions.assertTrue(filter.contains(s3), "应包含: " + s3); + Assertions.assertFalse(filter.contains("我好世界"), "多字"); + Assertions.assertFalse(filter.contains("父亲委派"), "改字"); + Assertions.assertFalse(filter.contains("测试"), "子串"); + Assertions.assertFalse(filter.contains(""), "空串"); + Assertions.assertFalse(filter.contains("👍"), "未添加的"); + } }