Skip to content

Commit 5ddd327

Browse files
committed
Speed up password filter generation
1 parent e777a24 commit 5ddd327

File tree

5 files changed

+43
-47
lines changed

5 files changed

+43
-47
lines changed

src/main/java/org/fastfilter/tools/BuildFilterFile.java

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515

1616
public class BuildFilterFile {
1717

18-
public static final int SEGMENT_BITS = 4;
18+
public static final int SEGMENT_BITS = 10;
1919

2020
public static void main(String... args) throws IOException {
2121
if (args.length != 1) {
2222
System.out.println("Usage: java " + BuildFilterFile.class.getName() + " <textFile>\n"
23-
+ "Builds a .filter file from a text file that contains SHA-1 hashes and counts.");
24-
// see also https://haveibeenpwned.com/passwords
23+
+ "Builds a .filter file from a text file that contains SHA-1 hashes and counts.\n"
24+
+ "You can get the hash file from https://haveibeenpwned.com/passwords\n"
25+
+ "It needs to be a list of SHA-1 hashes, ordered by hash, line format <hash>:<count>.");
2526
return;
2627
}
2728
String textFile = args[0];
@@ -32,6 +33,7 @@ public static void main(String... args) throws IOException {
3233
new File(filterFileName).delete();
3334
RandomAccessFile out = new RandomAccessFile(filterFileName, "rw");
3435
int lines = 0;
36+
long[] segmentStarts = new long[1 << SEGMENT_BITS];
3537
// header
3638
out.write(new byte[8 << SEGMENT_BITS]);
3739
int currentSegment = 0;
@@ -45,49 +47,54 @@ public static void main(String... args) throws IOException {
4547
lines++;
4648
long hash = 0;
4749
for (int i = 0; i < 16; i++) {
48-
hash <<= 4;
49-
hash |= StringUtils.getHex(line.charAt(i));
50+
hash = (hash << 4) | StringUtils.getHex(line.charAt(i));
5051
}
5152
if (lastHash == hash) {
52-
System.out.println("Warning: duplicate hash detected, ignoring: " + line);
53+
System.out.println("Warning: duplicate 64-bit key detected, ignoring: " + line);
5354
continue;
55+
} else if (Long.compareUnsigned(hash, lastHash) < 0) {
56+
throw new IllegalArgumentException("The file is not sorted by hash");
5457
}
5558
lastHash = hash;
5659
int dot = line.lastIndexOf(':');
5760
int count = Integer.parseInt(line.substring(dot + 1), 10);
58-
// set the lowest bit to 0
61+
// clear the lowest bit
5962
long key = hash ^ (hash & 1);
6063
// if common, set the lowest bit
6164
if (count > 9) {
6265
key |= 1;
6366
}
6467
int segment = (int) (key >>> (64 - SEGMENT_BITS));
6568
if (segment != currentSegment) {
66-
writeSegment(keys, currentSegment, out);
67-
long time = System.nanoTime() - start;
68-
System.out.println("Lines processed: " + lines + " " + (time / lines) + " ns/line");
69+
segmentStarts[currentSegment] = out.getFilePointer();
70+
out.write(getSegment(keys));
71+
keys.clear();
6972
currentSegment = segment;
7073
}
74+
if (lines % 10000000 == 0) {
75+
long time = System.nanoTime() - start;
76+
System.out.println(lines / 1000000 + " million lines processed, " + (time / lines) + " ns/line");
77+
}
7178
keys.add(key);
7279
}
73-
writeSegment(keys, currentSegment, out);
80+
segmentStarts[currentSegment] = out.getFilePointer();
81+
out.write(getSegment(keys));
7482
lineReader.close();
83+
out.seek(0);
84+
for(long s : segmentStarts) {
85+
out.writeLong(s);
86+
}
7587
out.close();
88+
long time = System.nanoTime() - start;
89+
System.out.println(lines + " lines processed, " + (time / 1000000 / 1000) + " seconds");
7690
}
7791

78-
private static void writeSegment(ArrayList<Long> keys, int segment,
79-
RandomAccessFile out) throws IOException {
92+
private static byte[] getSegment(ArrayList<Long> keys) {
8093
long[] array = new long[keys.size()];
81-
for(int i=0; i<keys.size(); i++) {
94+
for (int i = 0; i < keys.size(); i++) {
8295
array[i] = keys.get(i);
8396
}
84-
long start = out.length();
85-
out.seek(segment * 8);
86-
out.writeLong(start);
87-
out.seek(start);
88-
XorPlus8 filter = XorPlus8.construct(array);
89-
out.write(filter.getData());
90-
keys.clear();
97+
return XorPlus8.construct(array).getData();
9198
}
9299

93100
}

src/main/java/org/fastfilter/tools/PasswordLookup.java

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public class PasswordLookup {
1515

1616
public static void main(String... args) throws Exception {
1717
if (args.length != 1) {
18-
System.out.println("Usage: java " + PasswordLookup.class.getName() + " <filterFileName>\n"
18+
System.out.println("Usage: java " + PasswordLookup.class.getName() + " <filterFileName> \n"
1919
+ "Requires a filter file generated by " + BuildFilterFile.class.getName());
2020
return;
2121
}
@@ -39,7 +39,8 @@ public static void main(String... args) throws Exception {
3939
}
4040

4141
private static void testPassword(String filterFileName, String password) throws Exception {
42-
byte[] passwordBytes = password.getBytes(Charset.forName("ISO-8859-1"));
42+
// it's unclear which character set was used; ASCII gave good results, as umlauts are converted to '?'
43+
byte[] passwordBytes = password.getBytes(Charset.forName("ASCII"));
4344
MessageDigest md = MessageDigest.getInstance("SHA-1");
4445
byte[] sha1 = md.digest(passwordBytes);
4546
long hash = 0;
@@ -63,16 +64,12 @@ private static void testPassword(String filterFileName, String password) throws
6364
}
6465
XorPlus8 filter = new XorPlus8(in);
6566
in.close();
66-
boolean found = filter.mayContain(key);
67-
if (found) {
67+
if (filter.mayContain(key)) {
6868
System.out.println("Found");
69+
} else if (filter.mayContain(key | 1)) {
70+
System.out.println("Found; common");
6971
} else {
70-
found = filter.mayContain(key | 1);
71-
if (found) {
72-
System.out.println("Found; common");
73-
} else {
74-
System.out.println("Not found");
75-
}
72+
System.out.println("Not found");
7673
}
7774
}
7875

src/main/java/org/fastfilter/xor/Xor8.java

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,7 @@ public byte[] getData() {
171171
ByteArrayOutputStream out = new ByteArrayOutputStream();
172172
DataOutputStream d = new DataOutputStream(out);
173173
d.writeInt(size);
174-
d.writeInt(arrayLength);
175-
d.writeInt(blockLength);
176174
d.writeLong(seed);
177-
d.writeInt(bitCount);
178-
d.writeInt(fingerprints.length);
179175
d.write(fingerprints);
180176
return out.toByteArray();
181177
} catch (IOException e) {
@@ -187,12 +183,11 @@ public Xor8(InputStream in) {
187183
try {
188184
DataInputStream din = new DataInputStream(in);
189185
size = din.readInt();
190-
arrayLength = din.readInt();
191-
blockLength = din.readInt();
186+
arrayLength = getArrayLength(size);
187+
bitCount = arrayLength * BITS_PER_FINGERPRINT;
188+
blockLength = arrayLength / HASHES;
192189
seed = din.readLong();
193-
bitCount = din.readInt();
194-
int fingerprintLength = din.readInt();
195-
fingerprints = new byte[fingerprintLength];
190+
fingerprints = new byte[arrayLength];
196191
din.readFully(fingerprints);
197192
} catch (IOException e) {
198193
throw new RuntimeException(e);

src/main/java/org/fastfilter/xorplus/Rank9.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public Rank9(BitSet set, long bitCount) {
4545
this.bits = bits;
4646
long length = bits.length * 64;
4747
int numWords = (int) ((length + 63) / 64);
48-
final int numCounts = (int) ((length + 8 * 64 - 1) / (8 * 64)) * 2;
48+
int numCounts = (int) ((length + 8 * 64 - 1) / (8 * 64)) * 2;
4949
counts = new long[numCounts + 1];
5050
long c = 0;
5151
int pos = 0;

src/main/java/org/fastfilter/xorplus/XorPlus8.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -349,10 +349,7 @@ public byte[] getData() {
349349
ByteArrayOutputStream out = new ByteArrayOutputStream();
350350
DataOutputStream d = new DataOutputStream(out);
351351
d.writeInt(size);
352-
d.writeInt(arrayLength);
353-
d.writeInt(blockLength);
354352
d.writeLong(seed);
355-
d.writeInt(bitCount);
356353
d.writeInt(fingerprints.length);
357354
d.write(fingerprints);
358355
rank.write(d);
@@ -366,10 +363,10 @@ public XorPlus8(InputStream in) {
366363
try {
367364
DataInputStream din = new DataInputStream(in);
368365
size = din.readInt();
369-
arrayLength = din.readInt();
370-
blockLength = din.readInt();
366+
arrayLength = getArrayLength(size);
367+
bitCount = arrayLength * BITS_PER_FINGERPRINT;
368+
blockLength = arrayLength / HASHES;
371369
seed = din.readLong();
372-
bitCount = din.readInt();
373370
int fingerprintLength = din.readInt();
374371
fingerprints = new byte[fingerprintLength];
375372
din.readFully(fingerprints);

0 commit comments

Comments
 (0)