Add construction speed logging

Yury · Yury · commit d35f4288b291 · 2023-01-16T19:13:23.000-08:00
diff --git a/tests/python/git_tester.py b/tests/python/git_tester.py
@@ -9,19 +9,29 @@
 speedtest_copy_path = os.path.join("tests", "python", "speedtest2.py")
 shutil.copyfile(speedtest_src_path, speedtest_copy_path) # the file has to be outside of git
 
-commits = list(Repository('.', from_tag="v0.6.0").traverse_commits())
+commits = list(Repository('.', from_tag="v0.6.2").traverse_commits())
 print("Found commits:")
 for idx, commit in enumerate(commits):
     name = commit.msg.replace('\n', ' ').replace('\r', ' ')
     print(idx, commit.hash, name)
 
 for commit in commits:
-    name = commit.msg.replace('\n', ' ').replace('\r', ' ')
+    name = commit.msg.replace('\n', ' ').replace('\r', ' ').replace(",", ";")
     print("\nProcessing", commit.hash, name)
 
     if os.path.exists("build"):
         shutil.rmtree("build")
     os.system(f"git checkout {commit.hash}")
+    
+    # Checking we have actually switched the branch:
+    current_commit=list(Repository('.').traverse_commits())[-1]
+    if current_commit.hash != commit.hash:
+        print("git checkout failed!!!!")
+        print("git checkout failed!!!!")
+        print("git checkout failed!!!!")
+        print("git checkout failed!!!!")
+        continue
+    
     print("\n\n--------------------\n\n")
     ret = os.system("python -m pip install .")
     print("Install result:", ret)
@@ -33,8 +43,10 @@
         print("build failed!!!!")
         continue
 
-    os.system(f'python {speedtest_copy_path} -n "{name}" -d 4 -t 1')
-    os.system(f'python {speedtest_copy_path} -n "{name}" -d 64 -t 1')
-    os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 1')
-    os.system(f'python {speedtest_copy_path} -n "{name}" -d 4 -t 24')
-    os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 24')
+    # os.system(f'python {speedtest_copy_path} -n "{hash[:4]}_{name}" -d 32 -t 1')
+    os.system(f'python {speedtest_copy_path} -n "{commit.hash[:4]}_{name}" -d 16 -t 1')
+    os.system(f'python {speedtest_copy_path} -n "{commit.hash[:4]}_{name}" -d 16 -t 64')
+    # os.system(f'python {speedtest_copy_path} -n "{name}" -d 64 -t 1')
+    # os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 1')
+    # os.system(f'python {speedtest_copy_path} -n "{name}" -d 4 -t 24')
+    # os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 24')
diff --git a/tests/python/speedtest.py b/tests/python/speedtest.py
@@ -13,50 +13,53 @@
 dim = int(args.d)
 name = args.n
 threads=int(args.t)
-num_elements = 1000000 * 4//dim
+num_elements = 400000
 
 # Generating sample data
 np.random.seed(1)
 data = np.float32(np.random.random((num_elements, dim)))
 
 
-index_path=f'speed_index{dim}.bin'
+# index_path=f'speed_index{dim}.bin'
 # Declaring index
 p = hnswlib.Index(space='l2', dim=dim)  # possible options are l2, cosine or ip
 
-if not os.path.isfile(index_path) :
+# if not os.path.isfile(index_path) :
 
-    p.init_index(max_elements=num_elements, ef_construction=100, M=16)
+p.init_index(max_elements=num_elements, ef_construction=60, M=16)
 
-    # Controlling the recall by setting ef:
-    # higher ef leads to better accuracy, but slower search
-    p.set_ef(10)
+# Controlling the recall by setting ef:
+# higher ef leads to better accuracy, but slower search
+p.set_ef(10)
 
-    # Set number of threads used during batch search/construction
-    # By default using all available cores
-    p.set_num_threads(12)
+# Set number of threads used during batch search/construction
+# By default using all available cores
+p.set_num_threads(64)
+t0=time.time()
+p.add_items(data)
+construction_time=time.time()-t0
+# Serializing and deleting the index:
 
-    p.add_items(data)
-
-    # Serializing and deleting the index:
-
-    print("Saving index to '%s'" % index_path)
-    p.save_index(index_path)
+# print("Saving index to '%s'" % index_path)
+# p.save_index(index_path)
 p.set_num_threads(threads)
 times=[]
-time.sleep(10)
-p.set_ef(100)
-for _ in range(3):
-    p.load_index(index_path)
-    for _ in range(10):
+time.sleep(1)
+p.set_ef(15)
+for _ in range(1):
+    # p.load_index(index_path)
+    for _ in range(3):
         t0=time.time()
-        labels, distances = p.knn_query(data, k=1)
+        qdata=data[:5000*threads]
+        labels, distances = p.knn_query(qdata, k=1)
         tt=time.time()-t0
         times.append(tt)
-        print(f"{tt} seconds")    
-str_out=f"mean time:{np.mean(times)}, median time:{np.median(times)}, std time {np.std(times)} {name}"
+        recall=np.sum(labels.reshape(-1)==np.arange(len(qdata)))/len(qdata)
+        print(f"{tt} seconds, recall= {recall}")    
+        
+str_out=f"{np.mean(times)}, {np.median(times)}, {np.std(times)}, {construction_time}, {recall}, {name}"
 print(str_out)
-with open (f"log_{dim}_t{threads}.txt","a") as f:
+with open (f"log2_{dim}_t{threads}.txt","a") as f:
     f.write(str_out+"\n")
     f.flush()