Skip to content

Commit cc2b94f

Browse files
authored
Merge pull request nmslib#284 from marekhanus/feature/pep
Feature/pep
2 parents cd6c0fc + 8481a4b commit cc2b94f

File tree

6 files changed

+83
-83
lines changed

6 files changed

+83
-83
lines changed

examples/pyw_hnswlib.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ def __init__(self, space, dim):
1111
self.dict_labels = {}
1212
self.cur_ind = 0
1313

14-
def init_index(self, max_elements, ef_construction = 200, M = 16):
15-
self.index.init_index(max_elements = max_elements, ef_construction = ef_construction, M = M)
14+
def init_index(self, max_elements, ef_construction=200, M=16):
15+
self.index.init_index(max_elements=max_elements, ef_construction=ef_construction, M=M)
1616

1717
def add_items(self, data, ids=None):
1818
if ids is not None:

python_bindings/tests/bindings_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ def testRandomSelf(self):
1818
# Declaring index
1919
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip
2020

21-
# Initing index
21+
# Initiating index
2222
# max_elements - the maximum number of elements, should be known beforehand
2323
# (probably will be made optional in the future)
2424
#
2525
# ef_construction - controls index search speed/build speed tradeoff
2626
# M - is tightly connected with internal dimensionality of the data
27-
# stronlgy affects the memory consumption
27+
# strongly affects the memory consumption
2828

29-
p.init_index(max_elements = num_elements, ef_construction = 100, M = 16)
29+
p.init_index(max_elements=num_elements, ef_construction=100, M=16)
3030

3131
# Controlling the recall by setting ef:
3232
# higher ef leads to better accuracy, but slower search
@@ -51,7 +51,7 @@ def testRandomSelf(self):
5151
p.save_index(index_path)
5252
del p
5353

54-
# Reiniting, loading the index
54+
# Re-initiating, loading the index
5555
p = hnswlib.Index(space='l2', dim=dim) # you can change the sa
5656

5757
print("\nLoading index from '%s'\n" % index_path)

python_bindings/tests/bindings_test_getdata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ def testGettingItems(self):
1919
# Declaring index
2020
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip
2121

22-
# Initing index
22+
# Initiating index
2323
# max_elements - the maximum number of elements, should be known beforehand
2424
# (probably will be made optional in the future)
2525
#
2626
# ef_construction - controls index search speed/build speed tradeoff
2727
# M - is tightly connected with internal dimensionality of the data
28-
# stronlgy affects the memory consumption
28+
# strongly affects the memory consumption
2929

3030
p.init_index(max_elements=num_elements, ef_construction=100, M=16)
3131

python_bindings/tests/bindings_test_labels.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ def testRandomSelf(self):
2121
# Declaring index
2222
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip
2323

24-
# Initing index
24+
# Initiating index
2525
# max_elements - the maximum number of elements, should be known beforehand
2626
# (probably will be made optional in the future)
2727
#
2828
# ef_construction - controls index search speed/build speed tradeoff
2929
# M - is tightly connected with internal dimensionality of the data
30-
# stronlgy affects the memory consumption
30+
# strongly affects the memory consumption
3131

3232
p.init_index(max_elements=num_elements, ef_construction=100, M=16)
3333

@@ -47,7 +47,7 @@ def testRandomSelf(self):
4747
# Query the elements for themselves and measure recall:
4848
labels, distances = p.knn_query(data1, k=1)
4949

50-
items=p.get_items(labels)
50+
items = p.get_items(labels)
5151

5252
# Check the recall:
5353
self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3)
@@ -67,8 +67,8 @@ def testRandomSelf(self):
6767
print("Deleted")
6868

6969
print("\n**** Mark delete test ****\n")
70-
# Reiniting, loading the index
71-
print("Reiniting")
70+
# Re-initiating, loading the index
71+
print("Re-initiating")
7272
p = hnswlib.Index(space='l2', dim=dim)
7373

7474
print("\nLoading index from '%s'\n" % index_path)
@@ -80,17 +80,17 @@ def testRandomSelf(self):
8080

8181
# Query the elements for themselves and measure recall:
8282
labels, distances = p.knn_query(data, k=1)
83-
items=p.get_items(labels)
83+
items = p.get_items(labels)
8484

8585
# Check the recall:
8686
self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3)
8787

8888
# Check that the returned element data is correct:
89-
diff_with_gt_labels=np.mean(np.abs(data-items))
89+
diff_with_gt_labels = np.mean(np.abs(data-items))
9090
self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4) # deleting index.
9191

9292
# Checking that all labels are returned correctly:
93-
sorted_labels=sorted(p.get_ids_list())
93+
sorted_labels = sorted(p.get_ids_list())
9494
self.assertEqual(np.sum(~np.asarray(sorted_labels) == np.asarray(range(num_elements))), 0)
9595

9696
# Delete data1

python_bindings/tests/bindings_test_pickle.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -60,38 +60,38 @@ def test_space_main(self, space, dim):
6060

6161
p.num_threads = self.num_threads # by default using all available cores
6262

63-
p0 = pickle.loads(pickle.dumps(p)) ### pickle un-initialized Index
63+
p0 = pickle.loads(pickle.dumps(p)) # pickle un-initialized Index
6464
p.init_index(max_elements=self.num_elements, ef_construction=self.ef_construction, M=self.M)
6565
p0.init_index(max_elements=self.num_elements, ef_construction=self.ef_construction, M=self.M)
6666

6767
p.ef = self.ef
6868
p0.ef = self.ef
6969

70-
p1 = pickle.loads(pickle.dumps(p)) ### pickle Index before adding items
70+
p1 = pickle.loads(pickle.dumps(p)) # pickle Index before adding items
7171

72-
### add items to ann index p,p0,p1
72+
# add items to ann index p,p0,p1
7373
p.add_items(data)
7474
p1.add_items(data)
7575
p0.add_items(data)
7676

77-
p2=pickle.loads(pickle.dumps(p)) ### pickle Index before adding items
77+
p2=pickle.loads(pickle.dumps(p)) # pickle Index before adding items
7878

7979
self.assertTrue(np.allclose(p.get_items(), p0.get_items()), "items for p and p0 must be same")
8080
self.assertTrue(np.allclose(p0.get_items(), p1.get_items()), "items for p0 and p1 must be same")
8181
self.assertTrue(np.allclose(p1.get_items(), p2.get_items()), "items for p1 and p2 must be same")
8282

83-
### Test if returned distances are same
83+
# Test if returned distances are same
8484
l, d = p.knn_query(test_data, k=self.k)
8585
l0, d0 = p0.knn_query(test_data, k=self.k)
8686
l1, d1 = p1.knn_query(test_data, k=self.k)
8787
l2, d2 = p2.knn_query(test_data, k=self.k)
8888

89-
self.assertLessEqual(np.sum(((d-d0)**2.)>1e-3), self.dists_err_thresh, msg=f"knn distances returned by p and p0 must match")
90-
self.assertLessEqual(np.sum(((d0-d1)**2.)>1e-3), self.dists_err_thresh, msg=f"knn distances returned by p0 and p1 must match")
91-
self.assertLessEqual(np.sum(((d1-d2)**2.)>1e-3), self.dists_err_thresh, msg=f"knn distances returned by p1 and p2 must match")
89+
self.assertLessEqual(np.sum(((d-d0)**2.) > 1e-3), self.dists_err_thresh, msg=f"knn distances returned by p and p0 must match")
90+
self.assertLessEqual(np.sum(((d0-d1)**2.) > 1e-3), self.dists_err_thresh, msg=f"knn distances returned by p0 and p1 must match")
91+
self.assertLessEqual(np.sum(((d1-d2)**2.) > 1e-3), self.dists_err_thresh, msg=f"knn distances returned by p1 and p2 must match")
9292

93-
### check if ann results match brute-force search
94-
### allow for 2 labels to be missing from ann results
93+
# check if ann results match brute-force search
94+
# allow for 2 labels to be missing from ann results
9595
check_ann_results(self, space, data, test_data, self.k, l, d,
9696
err_thresh=self.label_err_thresh,
9797
total_thresh=self.item_err_thresh,
@@ -102,19 +102,19 @@ def test_space_main(self, space, dim):
102102
total_thresh=self.item_err_thresh,
103103
dists_thresh=self.dists_err_thresh)
104104

105-
### Check ef parameter value
105+
# Check ef parameter value
106106
self.assertEqual(p.ef, self.ef, "incorrect value of p.ef")
107107
self.assertEqual(p0.ef, self.ef, "incorrect value of p0.ef")
108108
self.assertEqual(p2.ef, self.ef, "incorrect value of p2.ef")
109109
self.assertEqual(p1.ef, self.ef, "incorrect value of p1.ef")
110110

111-
### Check M parameter value
111+
# Check M parameter value
112112
self.assertEqual(p.M, self.M, "incorrect value of p.M")
113113
self.assertEqual(p0.M, self.M, "incorrect value of p0.M")
114114
self.assertEqual(p1.M, self.M, "incorrect value of p1.M")
115115
self.assertEqual(p2.M, self.M, "incorrect value of p2.M")
116116

117-
### Check ef_construction parameter value
117+
# Check ef_construction parameter value
118118
self.assertEqual(p.ef_construction, self.ef_construction, "incorrect value of p.ef_construction")
119119
self.assertEqual(p0.ef_construction, self.ef_construction, "incorrect value of p0.ef_construction")
120120
self.assertEqual(p1.ef_construction, self.ef_construction, "incorrect value of p1.ef_construction")
@@ -135,12 +135,12 @@ def setUp(self):
135135
self.num_threads = 4
136136
self.k = 25
137137

138-
self.label_err_thresh = 5 ### max number of missing labels allowed per test item
139-
self.item_err_thresh = 5 ### max number of items allowed with incorrect labels
138+
self.label_err_thresh = 5 # max number of missing labels allowed per test item
139+
self.item_err_thresh = 5 # max number of items allowed with incorrect labels
140140

141-
self.dists_err_thresh = 50 ### for two matrices, d1 and d2, dists_err_thresh controls max
142-
### number of value pairs that are allowed to be different in d1 and d2
143-
### i.e., number of values that are (d1-d2)**2>1e-3
141+
self.dists_err_thresh = 50 # for two matrices, d1 and d2, dists_err_thresh controls max
142+
# number of value pairs that are allowed to be different in d1 and d2
143+
# i.e., number of values that are (d1-d2)**2>1e-3
144144

145145
def test_inner_product_space(self):
146146
test_space_main(self, 'ip', 48)

python_bindings/tests/bindings_test_resize.py

Lines changed: 49 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,71 +7,71 @@
77

88
class RandomSelfTestCase(unittest.TestCase):
99
def testRandomSelf(self):
10-
for idx in range(16):
11-
print("\n**** Index resize test ****\n")
10+
for idx in range(16):
11+
print("\n**** Index resize test ****\n")
1212

13-
np.random.seed(idx)
14-
dim = 16
15-
num_elements = 10000
13+
np.random.seed(idx)
14+
dim = 16
15+
num_elements = 10000
1616

17-
# Generating sample data
18-
data = np.float32(np.random.random((num_elements, dim)))
17+
# Generating sample data
18+
data = np.float32(np.random.random((num_elements, dim)))
1919

20-
# Declaring index
21-
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip
20+
# Declaring index
21+
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip
2222

23-
# Initing index
24-
# max_elements - the maximum number of elements, should be known beforehand
25-
# (probably will be made optional in the future)
26-
#
27-
# ef_construction - controls index search speed/build speed tradeoff
28-
# M - is tightly connected with internal dimensionality of the data
29-
# stronlgy affects the memory consumption
23+
# Initiating index
24+
# max_elements - the maximum number of elements, should be known beforehand
25+
# (probably will be made optional in the future)
26+
#
27+
# ef_construction - controls index search speed/build speed tradeoff
28+
# M - is tightly connected with internal dimensionality of the data
29+
# strongly affects the memory consumption
3030

31-
p.init_index(max_elements=num_elements//2, ef_construction=100, M=16)
31+
p.init_index(max_elements=num_elements//2, ef_construction=100, M=16)
3232

33-
# Controlling the recall by setting ef:
34-
# higher ef leads to better accuracy, but slower search
35-
p.set_ef(20)
33+
# Controlling the recall by setting ef:
34+
# higher ef leads to better accuracy, but slower search
35+
p.set_ef(20)
3636

37-
p.set_num_threads(idx%8) # by default using all available cores
37+
p.set_num_threads(idx % 8) # by default using all available cores
3838

39-
# We split the data in two batches:
40-
data1 = data[:num_elements // 2]
41-
data2 = data[num_elements // 2:]
39+
# We split the data in two batches:
40+
data1 = data[:num_elements // 2]
41+
data2 = data[num_elements // 2:]
4242

43-
print("Adding first batch of %d elements" % (len(data1)))
44-
p.add_items(data1)
43+
print("Adding first batch of %d elements" % (len(data1)))
44+
p.add_items(data1)
4545

46-
# Query the elements for themselves and measure recall:
47-
labels, distances = p.knn_query(data1, k=1)
46+
# Query the elements for themselves and measure recall:
47+
labels, distances = p.knn_query(data1, k=1)
4848

49-
items = p.get_items(list(range(len(data1))))
49+
items = p.get_items(list(range(len(data1))))
5050

51-
# Check the recall:
52-
self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3)
51+
# Check the recall:
52+
self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3)
5353

54-
# Check that the returned element data is correct:
55-
diff_with_gt_labels = np.max(np.abs(data1-items))
56-
self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
54+
# Check that the returned element data is correct:
55+
diff_with_gt_labels = np.max(np.abs(data1-items))
56+
self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
5757

58-
print("Resizing the index")
59-
p.resize_index(num_elements)
58+
print("Resizing the index")
59+
p.resize_index(num_elements)
6060

61-
print("Adding the second batch of %d elements" % (len(data2)))
62-
p.add_items(data2)
61+
print("Adding the second batch of %d elements" % (len(data2)))
62+
p.add_items(data2)
6363

64-
# Query the elements for themselves and measure recall:
65-
labels, distances = p.knn_query(data, k=1)
66-
items=p.get_items(list(range(num_elements)))
64+
# Query the elements for themselves and measure recall:
65+
labels, distances = p.knn_query(data, k=1)
66+
items=p.get_items(list(range(num_elements)))
6767

68-
# Check the recall:
69-
self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3)
68+
# Check the recall:
69+
self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3)
7070

71-
# Check that the returned element data is correct:
72-
diff_with_gt_labels=np.max(np.abs(data-items))
73-
self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
71+
# Check that the returned element data is correct:
72+
diff_with_gt_labels = np.max(np.abs(data-items))
73+
self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
7474

75-
# Checking that all labels are returned correcly:
76-
sorted_labels=sorted(p.get_ids_list())
77-
self.assertEqual(np.sum(~np.asarray(sorted_labels) == np.asarray(range(num_elements))), 0)
75+
# Checking that all labels are returned correctly:
76+
sorted_labels = sorted(p.get_ids_list())
77+
self.assertEqual(np.sum(~np.asarray(sorted_labels) == np.asarray(range(num_elements))), 0)

0 commit comments

Comments
 (0)