Skip to content

Commit d35f428

Browse files
author
Yury
committed
Add construction speed logging
1 parent dd266bc commit d35f428

File tree

2 files changed

+47
-32
lines changed

2 files changed

+47
-32
lines changed

tests/python/git_tester.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,29 @@
99
speedtest_copy_path = os.path.join("tests", "python", "speedtest2.py")
1010
shutil.copyfile(speedtest_src_path, speedtest_copy_path) # the file has to be outside of git
1111

12-
commits = list(Repository('.', from_tag="v0.6.0").traverse_commits())
12+
commits = list(Repository('.', from_tag="v0.6.2").traverse_commits())
1313
print("Found commits:")
1414
for idx, commit in enumerate(commits):
1515
name = commit.msg.replace('\n', ' ').replace('\r', ' ')
1616
print(idx, commit.hash, name)
1717

1818
for commit in commits:
19-
name = commit.msg.replace('\n', ' ').replace('\r', ' ')
19+
name = commit.msg.replace('\n', ' ').replace('\r', ' ').replace(",", ";")
2020
print("\nProcessing", commit.hash, name)
2121

2222
if os.path.exists("build"):
2323
shutil.rmtree("build")
2424
os.system(f"git checkout {commit.hash}")
25+
26+
# Checking we have actually switched the branch:
27+
current_commit=list(Repository('.').traverse_commits())[-1]
28+
if current_commit.hash != commit.hash:
29+
print("git checkout failed!!!!")
30+
print("git checkout failed!!!!")
31+
print("git checkout failed!!!!")
32+
print("git checkout failed!!!!")
33+
continue
34+
2535
print("\n\n--------------------\n\n")
2636
ret = os.system("python -m pip install .")
2737
print("Install result:", ret)
@@ -33,8 +43,10 @@
3343
print("build failed!!!!")
3444
continue
3545

36-
os.system(f'python {speedtest_copy_path} -n "{name}" -d 4 -t 1')
37-
os.system(f'python {speedtest_copy_path} -n "{name}" -d 64 -t 1')
38-
os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 1')
39-
os.system(f'python {speedtest_copy_path} -n "{name}" -d 4 -t 24')
40-
os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 24')
46+
# os.system(f'python {speedtest_copy_path} -n "{hash[:4]}_{name}" -d 32 -t 1')
47+
os.system(f'python {speedtest_copy_path} -n "{commit.hash[:4]}_{name}" -d 16 -t 1')
48+
os.system(f'python {speedtest_copy_path} -n "{commit.hash[:4]}_{name}" -d 16 -t 64')
49+
# os.system(f'python {speedtest_copy_path} -n "{name}" -d 64 -t 1')
50+
# os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 1')
51+
# os.system(f'python {speedtest_copy_path} -n "{name}" -d 4 -t 24')
52+
# os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 24')

tests/python/speedtest.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,50 +13,53 @@
1313
dim = int(args.d)
1414
name = args.n
1515
threads=int(args.t)
16-
num_elements = 1000000 * 4//dim
16+
num_elements = 400000
1717

1818
# Generating sample data
1919
np.random.seed(1)
2020
data = np.float32(np.random.random((num_elements, dim)))
2121

2222

23-
index_path=f'speed_index{dim}.bin'
23+
# index_path=f'speed_index{dim}.bin'
2424
# Declaring index
2525
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip
2626

27-
if not os.path.isfile(index_path) :
27+
# if not os.path.isfile(index_path) :
2828

29-
p.init_index(max_elements=num_elements, ef_construction=100, M=16)
29+
p.init_index(max_elements=num_elements, ef_construction=60, M=16)
3030

31-
# Controlling the recall by setting ef:
32-
# higher ef leads to better accuracy, but slower search
33-
p.set_ef(10)
31+
# Controlling the recall by setting ef:
32+
# higher ef leads to better accuracy, but slower search
33+
p.set_ef(10)
3434

35-
# Set number of threads used during batch search/construction
36-
# By default using all available cores
37-
p.set_num_threads(12)
35+
# Set number of threads used during batch search/construction
36+
# By default using all available cores
37+
p.set_num_threads(64)
38+
t0=time.time()
39+
p.add_items(data)
40+
construction_time=time.time()-t0
41+
# Serializing and deleting the index:
3842

39-
p.add_items(data)
40-
41-
# Serializing and deleting the index:
42-
43-
print("Saving index to '%s'" % index_path)
44-
p.save_index(index_path)
43+
# print("Saving index to '%s'" % index_path)
44+
# p.save_index(index_path)
4545
p.set_num_threads(threads)
4646
times=[]
47-
time.sleep(10)
48-
p.set_ef(100)
49-
for _ in range(3):
50-
p.load_index(index_path)
51-
for _ in range(10):
47+
time.sleep(1)
48+
p.set_ef(15)
49+
for _ in range(1):
50+
# p.load_index(index_path)
51+
for _ in range(3):
5252
t0=time.time()
53-
labels, distances = p.knn_query(data, k=1)
53+
qdata=data[:5000*threads]
54+
labels, distances = p.knn_query(qdata, k=1)
5455
tt=time.time()-t0
5556
times.append(tt)
56-
print(f"{tt} seconds")
57-
str_out=f"mean time:{np.mean(times)}, median time:{np.median(times)}, std time {np.std(times)} {name}"
57+
recall=np.sum(labels.reshape(-1)==np.arange(len(qdata)))/len(qdata)
58+
print(f"{tt} seconds, recall= {recall}")
59+
60+
str_out=f"{np.mean(times)}, {np.median(times)}, {np.std(times)}, {construction_time}, {recall}, {name}"
5861
print(str_out)
59-
with open (f"log_{dim}_t{threads}.txt","a") as f:
62+
with open (f"log2_{dim}_t{threads}.txt","a") as f:
6063
f.write(str_out+"\n")
6164
f.flush()
6265

0 commit comments

Comments
 (0)