|
13 | 13 | dim = int(args.d) |
14 | 14 | name = args.n |
15 | 15 | threads=int(args.t) |
16 | | -num_elements = 1000000 * 4//dim |
| 16 | +num_elements = 400000 |
17 | 17 |
|
18 | 18 | # Generating sample data |
19 | 19 | np.random.seed(1) |
20 | 20 | data = np.float32(np.random.random((num_elements, dim))) |
21 | 21 |
|
22 | 22 |
|
23 | | -index_path=f'speed_index{dim}.bin' |
| 23 | +# index_path=f'speed_index{dim}.bin' |
24 | 24 | # Declaring index |
25 | 25 | p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip |
26 | 26 |
|
27 | | -if not os.path.isfile(index_path) : |
| 27 | +# if not os.path.isfile(index_path) : |
28 | 28 |
|
29 | | - p.init_index(max_elements=num_elements, ef_construction=100, M=16) |
| 29 | +p.init_index(max_elements=num_elements, ef_construction=60, M=16) |
30 | 30 |
|
31 | | - # Controlling the recall by setting ef: |
32 | | - # higher ef leads to better accuracy, but slower search |
33 | | - p.set_ef(10) |
| 31 | +# Controlling the recall by setting ef: |
| 32 | +# higher ef leads to better accuracy, but slower search |
| 33 | +p.set_ef(10) |
34 | 34 |
|
35 | | - # Set number of threads used during batch search/construction |
36 | | - # By default using all available cores |
37 | | - p.set_num_threads(12) |
| 35 | +# Set number of threads used during batch search/construction |
| 36 | +# By default using all available cores |
| 37 | +p.set_num_threads(64) |
| 38 | +t0=time.time() |
| 39 | +p.add_items(data) |
| 40 | +construction_time=time.time()-t0 |
| 41 | +# Serializing and deleting the index: |
38 | 42 |
|
39 | | - p.add_items(data) |
40 | | - |
41 | | - # Serializing and deleting the index: |
42 | | - |
43 | | - print("Saving index to '%s'" % index_path) |
44 | | - p.save_index(index_path) |
| 43 | +# print("Saving index to '%s'" % index_path) |
| 44 | +# p.save_index(index_path) |
45 | 45 | p.set_num_threads(threads) |
46 | 46 | times=[] |
47 | | -time.sleep(10) |
48 | | -p.set_ef(100) |
49 | | -for _ in range(3): |
50 | | - p.load_index(index_path) |
51 | | - for _ in range(10): |
| 47 | +time.sleep(1) |
| 48 | +p.set_ef(15) |
| 49 | +for _ in range(1): |
| 50 | + # p.load_index(index_path) |
| 51 | + for _ in range(3): |
52 | 52 | t0=time.time() |
53 | | - labels, distances = p.knn_query(data, k=1) |
| 53 | + qdata=data[:5000*threads] |
| 54 | + labels, distances = p.knn_query(qdata, k=1) |
54 | 55 | tt=time.time()-t0 |
55 | 56 | times.append(tt) |
56 | | - print(f"{tt} seconds") |
57 | | -str_out=f"mean time:{np.mean(times)}, median time:{np.median(times)}, std time {np.std(times)} {name}" |
| 57 | + recall=np.sum(labels.reshape(-1)==np.arange(len(qdata)))/len(qdata) |
| 58 | + print(f"{tt} seconds, recall= {recall}") |
| 59 | + |
| 60 | +str_out=f"{np.mean(times)}, {np.median(times)}, {np.std(times)}, {construction_time}, {recall}, {name}" |
58 | 61 | print(str_out) |
59 | | -with open (f"log_{dim}_t{threads}.txt","a") as f: |
| 62 | +with open (f"log2_{dim}_t{threads}.txt","a") as f: |
60 | 63 | f.write(str_out+"\n") |
61 | 64 | f.flush() |
62 | 65 |
|
0 commit comments