@@ -12,38 +12,48 @@ Tests
12
12
import SparseArrays: sprandn
13
13
14
14
@testset " Can construct a simple MIPSHash" begin
15
- hashfn = MIPSHash ()
15
+ hashfn = MIPSHash (; maxnorm = 1 )
16
16
17
17
@test n_hashes (hashfn) == 1
18
18
@test hashtype (hashfn) == Vector{Int32}
19
19
@test isa (hashfn, MIPSHash{Float32}) # Default dtype should be Float32
20
20
@test isa (hashfn, LSH. AsymmetricLSHFunction)
21
21
22
22
# #
23
- hashfn = MIPSHash (12 )
23
+ hashfn = MIPSHash (12 ; maxnorm = 1 )
24
24
25
25
@test n_hashes (hashfn) == 12
26
26
27
27
# #
28
- hashfn = MIPSHash (; dtype= Float64)
28
+ hashfn = MIPSHash (; dtype= Float64, maxnorm = 1 )
29
29
30
30
@test isa (hashfn, MIPSHash{Float64})
31
31
32
32
# #
33
- hashfn = MIPSHash {Float64} ()
33
+ hashfn = MIPSHash {Float64} (; maxnorm = 1 )
34
34
@test isa (hashfn, MIPSHash{Float64})
35
35
36
36
# ## Invalid hash function construction
37
-
38
- @test_throws ErrorException MIPSHash (- 1 )
39
- @test_throws ErrorException MIPSHash (; m= - 1 )
40
- @test_throws ErrorException MIPSHash (; m= 0 )
41
- @test_throws ErrorException MIPSHash (; scale= - 1 )
42
- @test_throws ErrorException MIPSHash (; scale= 0 )
37
+ # Non-positive number of hash functions
38
+ @test_throws ErrorException MIPSHash (- 1 ; maxnorm= 1 )
39
+ @test_throws ErrorException MIPSHash ( 0 ; maxnorm= 1 )
40
+
41
+ # Non-positive m
42
+ @test_throws ErrorException MIPSHash (; m = - 1 , maxnorm= 1 )
43
+ @test_throws ErrorException MIPSHash (; m = 0 , maxnorm= 1 )
44
+
45
+ # Non-positive scale factor
46
+ @test_throws ErrorException MIPSHash (; scale = - 1 , maxnorm= 1 )
47
+ @test_throws ErrorException MIPSHash (; scale = 0 , maxnorm= 1 )
48
+
49
+ # maxnorm not specified or non-positive
50
+ @test_throws ErrorException MIPSHash ()
51
+ @test_throws ErrorException MIPSHash (; maxnorm= - 1 )
52
+ @test_throws ErrorException MIPSHash (; maxnorm= 0 )
43
53
end
44
54
45
55
@testset " Hashing returns the correct data types" begin
46
- hashfn = MIPSHash {Float64} (; scale= 1 , m= 3 )
56
+ hashfn = MIPSHash {Float64} (; maxnorm = 20 , scale= 1 , m= 3 )
47
57
48
58
# Matrix{Float64} -> Matrix{Int32}
49
59
x = randn (4 , 10 )
@@ -66,14 +76,16 @@ Tests
66
76
67
77
@testset " MIPSHash h(P(x)) is correctly computed" begin
68
78
n_hashes = 128
69
- scale = 0.5
70
- m = 3
71
- hashfn = MIPSHash (n_hashes; scale= scale, m= m)
79
+ scale = 0.5
80
+ m = 3
81
+ x = randn (20 )
82
+ maxnorm = 2 * norm (x)
83
+
84
+ hashfn = MIPSHash (n_hashes; maxnorm= maxnorm, scale= scale, m= m)
72
85
73
86
@test size (hashfn. coeff_B) == (n_hashes, 3 )
74
87
@test size (hashfn. shift) == (n_hashes,)
75
88
76
- x = randn (20 )
77
89
hash = index_hash (hashfn, x)
78
90
79
91
@test isa (hash, Vector{Int32})
87
99
# ## Compute hash manually
88
100
# Start by performing the transform P(x)
89
101
coeff = [hashfn. coeff_A hashfn. coeff_B]
90
- u = x / norm (x)
102
+ u = x / maxnorm
91
103
norm_powers = [norm (u)^ 2 , norm (u)^ 4 , norm (u)^ 8 ]
92
104
Px = [u; norm_powers]
93
105
@@ -100,14 +112,16 @@ Tests
100
112
101
113
@testset " MIPSHash h(Q(x)) is correctly computed" begin
102
114
n_hashes = 128
103
- scale = 0.5
104
- m = 3
105
- hashfn = MIPSHash (n_hashes; scale= scale, m= m)
115
+ scale = 0.5
116
+ m = 3
117
+ x = randn (20 )
118
+ maxnorm = 2 * norm (x)
119
+
120
+ hashfn = MIPSHash (n_hashes; maxnorm= maxnorm, scale= scale, m= m)
106
121
107
122
@test size (hashfn. coeff_B) == (n_hashes, m)
108
123
@test size (hashfn. shift) == (n_hashes,)
109
124
110
- x = randn (40 )
111
125
hash = query_hash (hashfn, x)
112
126
113
127
@test isa (hash, Vector{Int32})
@@ -135,7 +149,7 @@ Tests
135
149
136
150
@testset " Hash inputs of different sizes" begin
137
151
n_hashes = 16
138
- hashfn = MIPSHash (n_hashes)
152
+ hashfn = MIPSHash (n_hashes; maxnorm = 1000 )
139
153
140
154
index_hash (hashfn, rand (10 ))
141
155
@test size (hashfn. coeff_A) == (n_hashes, 10 )
@@ -157,7 +171,7 @@ Tests
157
171
end
158
172
159
173
@testset " resize_pow2 increases number of coefficients to powers of 2" begin
160
- hashfn = MIPSHash (10 ; resize_pow2= true )
174
+ hashfn = MIPSHash (10 ; maxnorm = 1000 , resize_pow2= true )
161
175
@test size (hashfn. coeff_A) == (10 , 0 )
162
176
163
177
index_hash (hashfn, rand (3 ))
@@ -174,45 +188,40 @@ Tests
174
188
end
175
189
176
190
@testset " MIPSHash generates collisions for large inner products" begin
177
- n_hashes = 256
178
- scale = 1
179
- m = 5
180
- hashfn = MIPSHash (n_hashes; scale= scale, m= m)
181
-
182
- x = randn (20 )
183
- x_query_hashes = query_hash (hashfn, x)
184
-
185
- # Check that MIPSHash isn't just generating a single query hash
186
- @test any (x_query_hashes .!= x_query_hashes[1 ])
191
+ input_length = 5 ; n_hashes = 128 ;
187
192
188
- # Compute the indexing hashes for a dataset with four vectors:
189
- # a) 10 * x (where x is the test query vector)
193
+ # Compare a random vector x against four other vectors:
194
+ # a) 10 * x
190
195
# b) x
191
196
# c) A vector of all zeros
192
197
# d) -x
193
- dataset = [(10 * x) x zero (x) - x]
198
+ x = randn (input_length)
199
+ x2, x3, x4 = 10 * x, zero (x), - x
200
+
201
+ maxnorm = (x, x2, x3, x4) .| > norm |> maximum
202
+ hashfn = MIPSHash (n_hashes; maxnorm= maxnorm)
203
+
204
+ x_query_hashes = query_hash (hashfn, x)
205
+
206
+ dataset = [x2 x x3 x4]
194
207
p_hashes = index_hash (hashfn, dataset)
195
208
196
209
# Each collection of hashes should be different from one another
197
210
@test let result = true
198
- for (ii,jj) in product (1 : 4 , 1 : 4 )
199
- if ii != jj && p_hashes[:,ii] == p_hashes[:,jj]
200
- result = false
201
- break
202
- end
203
- end
204
- result
211
+ for (ii,jj) in Iterators . product (1 : 4 , 1 : 4 )
212
+ if ii != jj && p_hashes[:,ii] == p_hashes[:,jj]
213
+ result = false
214
+ break
215
+ end
216
+ end
217
+ result
205
218
end
206
-
207
- # The number of collisions should be highest for x and 2*x, second-highest
208
- # for x and x, second-lowest for x and zeros, and lowest for x and -x
209
- n_collisions = [sum (x_query_hashes .== p) for p in eachcol (p_hashes)]
210
- @test n_collisions[1 ] > n_collisions[2 ] > n_collisions[3 ] > n_collisions[4 ]
211
219
end
212
220
213
221
@testset " Can compute hashes for sparse arrays" begin
214
222
X = sprandn (Float32, 10 , 1000 , 0.2 )
215
- hashfn = MIPSHash (8 ; scale= 1 , m= 1 )
223
+ maxnorm = X |> eachcol .| > norm |> maximum
224
+ hashfn = MIPSHash (8 ; maxnorm= maxnorm, scale= 1 , m= 1 )
216
225
217
226
ihashes = index_hash (hashfn, X)
218
227
qhashes = query_hash (hashfn, X)
0 commit comments