Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public abstract class AbstractFilter implements BloomFilter {
@Serial
private static final long serialVersionUID = 1L;

private final BitSet bitSet;
protected final BitSet bitSet;
/**
* 容量
*/
Expand All @@ -50,12 +50,12 @@ public AbstractFilter(final int size) {

@Override
public boolean contains(final String str) {
return bitSet.get(Math.abs(hash(str)));
return bitSet.get(hash(str));
}

@Override
public boolean add(final String str) {
final int hash = Math.abs(hash(str));
final int hash = hash(str);
if (bitSet.get(hash)) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@

package cn.hutool.v7.core.text.bloom;

import cn.hutool.v7.core.lang.Assert;

import java.io.Serial;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;

/**
Expand All @@ -33,26 +38,69 @@ public class FuncFilter extends AbstractFilter {
* 创建FuncFilter
*
* @param size 最大值
* @param hashFunc Hash函数
* @param hashFuncs Hash函数
* @return FuncFilter
*/
public static FuncFilter of(final int size, final Function<String, Number> hashFunc) {
return new FuncFilter(size, hashFunc);
@SafeVarargs
public static FuncFilter of(final int size, final Function<String, Number>... hashFuncs) {
return new FuncFilter(size, hashFuncs);
}

private final Function<String, Number> hashFunc;
// 允许接收多个哈希函数
private final List<Function<String, Number>> hashFuncs;

/**
* @param size 最大值
* @param hashFunc Hash函数
* @param hashFuncs Hash函数
*/
public FuncFilter(final int size, final Function<String, Number> hashFunc) {
@SafeVarargs
public FuncFilter(final int size, final Function<String, Number>... hashFuncs) {
super(size);
this.hashFunc = hashFunc;
Assert.notEmpty(hashFuncs, "Hash functions must not be empty");
this.hashFuncs = Collections.unmodifiableList(Arrays.asList(hashFuncs));
}

/**
*兼容父类,如果存在多个哈希函数,就使用第一个
*
* @param str 字符串
*/
@Override
public int hash(final String str) {
return hashFunc.apply(str).intValue() % size;
return hash(str, hashFuncs.get(0));
}

/**
*
* @param str 字符串
* @param hashFunc 哈希函数
* @return HashCode 指定哈希函数的计算结果
*/
public int hash(final String str, final Function<String, Number> hashFunc) {
// 通过位运算获取正数
return (hashFunc.apply(str).intValue() & 0x7FFFFFFF) % size;
}

@Override
public boolean contains(final String str) {
for (final Function<String, Number> hashFunc : hashFuncs) {
if (!bitSet.get(hash(str, hashFunc))) {
return false;
}
}
return true;
}

@Override
public boolean add(final String str) {
boolean add = false;
for (final Function<String, Number> hashFunc : hashFuncs) {
int hash = hash(str, hashFunc);
if (!bitSet.get(hash)) {
bitSet.set(hash);
add = true;
}
}
return add;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@

public class BitMapBloomFilterTest {

private static final int SIZE = 2 * 1024 * 1024 * 8;

@Test
public void filterTest() {
final int size = 2 * 1024 * 1024 * 8;

final CombinedBloomFilter filter = new CombinedBloomFilter(FuncFilter.of(size, HashUtil::rsHash));
final CombinedBloomFilter filter = new CombinedBloomFilter(FuncFilter.of(SIZE, HashUtil::rsHash));
filter.add("123");
filter.add("abc");
filter.add("ddd");
Expand All @@ -35,4 +36,68 @@ public void filterTest() {
Assertions.assertTrue(filter.contains("ddd"));
Assertions.assertTrue(filter.contains("123"));
}
@Test
public void multiHashFuncTest() {
final FuncFilter filter = FuncFilter.of(SIZE,
HashUtil::rsHash,
HashUtil::jsHash,
HashUtil::pjwHash,
HashUtil::elfHash,
HashUtil::bkdrHash,
HashUtil::sdbmHash,
HashUtil::djbHash,
HashUtil::dekHash,
HashUtil::apHash,
HashUtil::javaDefaultHash
);

filter.add("Hutool");
filter.add("BloomFilter");
filter.add("Java");

Assertions.assertTrue(filter.contains("Hutool"));
Assertions.assertTrue(filter.contains("BloomFilter"));
Assertions.assertTrue(filter.contains("Java"));
Assertions.assertFalse(filter.contains("Python"));
Assertions.assertFalse(filter.contains("Go"));
Assertions.assertFalse(filter.contains("hutool"));
}

@Test
public void combinedMultiHashTest() {
FuncFilter multiHashFuncFilter = FuncFilter.of(SIZE,
HashUtil::bkdrHash,
HashUtil::apHash,
HashUtil::djbHash
);
final CombinedBloomFilter filter = new CombinedBloomFilter(multiHashFuncFilter);
filter.add("123123WASD-WASD");
Assertions.assertTrue(filter.contains("123123WASD-WASD"));
Assertions.assertFalse(filter.contains("123123WASD-WASD-false"));
}

@Test
public void chineseStringWithThreeHashesTest() {
final FuncFilter filter = FuncFilter.of(SIZE,
HashUtil::bkdrHash,
HashUtil::apHash,
HashUtil::djbHash
);

String s1 = "你好世界";
String s2 = "双亲委派";
String s3 = "测试工程师";

filter.add(s1);
filter.add(s2);
filter.add(s3);
Assertions.assertTrue(filter.contains(s1), "应包含: " + s1);
Assertions.assertTrue(filter.contains(s2), "应包含: " + s2);
Assertions.assertTrue(filter.contains(s3), "应包含: " + s3);
Assertions.assertFalse(filter.contains("我好世界"), "多字");
Assertions.assertFalse(filter.contains("父亲委派"), "改字");
Assertions.assertFalse(filter.contains("测试"), "子串");
Assertions.assertFalse(filter.contains(""), "空串");
Assertions.assertFalse(filter.contains("👍"), "未添加的");
}
}