8
8
@testset " SignALSH tests" begin
9
9
Random. seed! (RANDOM_SEED)
10
10
11
+ @testset " Construct SignALSH" begin
12
+ hashfn = SignALSH (; maxnorm= 1 )
13
+ @test n_hashes (hashfn) == 1
14
+ @test isa (hashfn, SignALSH{Float32})
15
+ @test isa (hashfn, LSH. AsymmetricLSHFunction)
16
+ @test hashtype (hashfn) == BitArray{1 }
17
+
18
+ hashfn = SignALSH (32 ; maxnorm= 1 )
19
+ @test n_hashes (hashfn) == 32
20
+
21
+ hashfn = SignALSH (; dtype= Float64, maxnorm= 1 )
22
+ @test isa (hashfn, SignALSH{Float64})
23
+
24
+ # maxnorm must be specified and non-negative
25
+ @test_throws ErrorException SignALSH ()
26
+ @test_throws ErrorException SignALSH (; maxnorm= - 1 )
27
+
28
+ # m must be positive
29
+ @test_throws ErrorException SignALSH (; m= - 1 )
30
+ @test_throws ErrorException SignALSH (; m= 0 )
31
+ end
32
+
11
33
@test_skip @testset " Can hash inputs correctly with SignALSH" begin
12
34
input_length = 5
13
35
n_hashes = 8
@@ -45,22 +67,33 @@ Tests
45
67
@test simhash (Qx) == qhashes
46
68
end
47
69
48
- @testset " SignALSH generates collisions for large inner products" begin
49
- input_length = 5 ; n_hashes = 128 ;
50
- hashfn = SignALSH (n_hashes)
70
+ @testset " SignALSH can't hash inputs of norm > maxnorm" begin
71
+ hashfn = SignALSH (; maxnorm= 0 )
72
+ @test_throws ErrorException index_hash (hashfn, rand (4 ))
73
+ @test_throws ErrorException query_hash (hashfn, rand (4 ))
51
74
52
- x = randn (input_length)
53
- x_query_hashes = query_hash (hashfn, x)
75
+ # Should have no issue if norm(x) == maxnorm
76
+ @test index_hash (hashfn, zeros (4 )) |> length == 1
77
+ @test query_hash (hashfn, zeros (4 )) |> length == 1
78
+ end
54
79
55
- # Check that SignALSH isn't just generating a single query hash
56
- @test any (x_query_hashes .!= x_query_hashes[ 1 ])
80
+ @testset " SignALSH generates collisions for large inner products " begin
81
+ input_length = 5 ; n_hashes = 128 ;
57
82
58
- # Compute the indexing hashes for a dataset with four vectors:
59
- # a) 10 * x (where x is the test query vector)
83
+ # Compare a random vector x against four other vectors:
84
+ # a) 10 * x
60
85
# b) x
61
86
# c) A vector of all zeros
62
87
# d) -x
63
- dataset = [(10 * x) x zeros (input_length) - x]
88
+ x = randn (input_length)
89
+ x2, x3, x4 = 10 * x, zero (x), - x
90
+
91
+ maxnorm = (x, x2, x3, x4) .| > norm |> maximum
92
+ hashfn = SignALSH (n_hashes; maxnorm= maxnorm)
93
+
94
+ x_query_hashes = query_hash (hashfn, x)
95
+
96
+ dataset = [x2 x x3 x4]
64
97
p_hashes = index_hash (hashfn, dataset)
65
98
66
99
# Each collection of hashes should be different from one another
85
118
n_inputs = 150
86
119
n_hashes = 2
87
120
88
- hashfn = SignALSH (n_hashes)
121
+ hashfn = SignALSH (n_hashes; maxnorm = 4 * input_size )
89
122
x = sprandn (input_size, n_inputs, 0.2 )
90
123
91
124
# Mostly just need to test that the following lines don't crash
@@ -100,7 +133,7 @@ Tests
100
133
input_size = 100
101
134
n_inputs = 150
102
135
n_hashes = 2
103
- hashfn = SignALSH (n_hashes)
136
+ hashfn = SignALSH (n_hashes; maxnorm = 4 * input_size )
104
137
105
138
# # Test 1: regular matrix adjoint
106
139
x = randn (n_inputs, input_size)'
@@ -115,7 +148,7 @@ Tests
115
148
116
149
@testset " Hash inputs of different sizes" begin
117
150
n_hashes = 42
118
- hashfn = SignALSH (n_hashes)
151
+ hashfn = SignALSH (n_hashes; maxnorm = 100 )
119
152
120
153
@test size (hashfn. coeff_A) == (n_hashes, 0 )
121
154
@@ -140,7 +173,7 @@ Tests
140
173
141
174
@testset " Hash inputs of different sizes with resize_pow2 = true" begin
142
175
n_hashes = 25
143
- hashfn = SignALSH (n_hashes; resize_pow2= true )
176
+ hashfn = SignALSH (n_hashes; maxnorm = 100 , resize_pow2= true )
144
177
145
178
@test size (hashfn. coeff_A) == (n_hashes, 0 )
146
179
0 commit comments