diff --git a/.gitignore b/.gitignore
index c4045e98..d2cde965 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,8 @@
-hnswlib.egg-info/
-build/
-dist/
-tmp/
-python_bindings/tests/__pycache__/
-*.pyd
-hnswlib.cpython*.so
+hnswlib.egg-info/
+build/
+dist/
+tmp/
+python_bindings/tests/__pycache__/
+*.pyd
+hnswlib.cpython*.so
+var/
diff --git a/.travis.yml b/.travis.yml
index 893441e9..2c3c9960 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,9 +30,8 @@ jobs:
 
 install:
   - |
-    pip install -r requirements.txt
-    python setup.py install
+    python -m pip install .
 
 script:
   - |
-    python setup.py test
+    python -m unittest discover --start-directory python_bindings/tests --pattern "*_test*.py"
diff --git a/Makefile b/Makefile
index 792b246e..b5e8fda9 100644
--- a/Makefile
+++ b/Makefile
@@ -3,12 +3,13 @@ pypi: dist
 
 dist:
 	-rm dist/*
-	python3 setup.py sdist
+	pip install build
+	python3 -m build --sdist
 
 test:
-	python3 setup.py test
+	python3 -m unittest discover --start-directory python_bindings/tests --pattern "*_test*.py"
 
 clean:
 	rm -rf *.egg-info build dist tmp var tests/__pycache__ hnswlib.cpython*.so
 
-.PHONY: dist
\ No newline at end of file
+.PHONY: dist
diff --git a/README.md b/README.md
index 89cce5ce..90105f0d 100644
--- a/README.md
+++ b/README.md
@@ -213,8 +213,9 @@ print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(dat
 You can install from sources:
 ```bash
 apt-get install -y python-setuptools python-pip
-pip3 install pybind11 numpy setuptools
-python3 setup.py install
+git clone https://github.com/nmslib/hnswlib.git
+cd hnswlib
+pip install .
 ```
 
 or you can install via pip:
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..e00b3fb8
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,9 @@
+[build-system]
+requires = [
+    "setuptools>=42",
+    "wheel",
+    "numpy>=1.10.0",
+    "pybind11>=2.0",
+]
+
+build-backend = "setuptools.build_meta"
diff --git a/python_bindings/tests/bindings_test.py b/python_bindings/tests/bindings_test.py
index 009b2164..d718bc3b 100644
--- a/python_bindings/tests/bindings_test.py
+++ b/python_bindings/tests/bindings_test.py
@@ -1,11 +1,13 @@
 import os
 import unittest
 
+import numpy as np
+
+import hnswlib
+
 
 class RandomSelfTestCase(unittest.TestCase):
     def testRandomSelf(self):
-        import hnswlib
-        import numpy as np
 
         dim = 16
         num_elements = 10000
@@ -41,7 +43,7 @@ def testRandomSelf(self):
 
         # Query the elements for themselves and measure recall:
         labels, distances = p.knn_query(data1, k=1)
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))),1.0,3)
+        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3)
 
         # Serializing and deleting the index:
         index_path = 'first_half.bin'
@@ -61,10 +63,6 @@ def testRandomSelf(self):
         # Query the elements for themselves and measure recall:
         labels, distances = p.knn_query(data, k=1)
 
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))),1.0,3)
+        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3)
         
         os.remove(index_path)
-
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
diff --git a/python_bindings/tests/bindings_test_getdata.py b/python_bindings/tests/bindings_test_getdata.py
index 3e234518..8655d7f8 100644
--- a/python_bindings/tests/bindings_test_getdata.py
+++ b/python_bindings/tests/bindings_test_getdata.py
@@ -1,11 +1,13 @@
 import unittest
 
+import numpy as np
+
+import hnswlib
+
 
 class RandomSelfTestCase(unittest.TestCase):
     def testGettingItems(self):
         print("\n**** Getting the data by label test ****\n")
-        import hnswlib
-        import numpy as np
 
         dim = 16
         num_elements = 10000
@@ -42,6 +44,3 @@ def testGettingItems(self):
         # After adding them, all labels should be retrievable
         returned_items = p.get_items(labels)
         self.assertSequenceEqual(data.tolist(), returned_items)
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
diff --git a/python_bindings/tests/bindings_test_labels.py b/python_bindings/tests/bindings_test_labels.py
index e44b0988..5c13e198 100644
--- a/python_bindings/tests/bindings_test_labels.py
+++ b/python_bindings/tests/bindings_test_labels.py
@@ -1,131 +1,127 @@
 import os
 import unittest
 
+import numpy as np
 
-class RandomSelfTestCase(unittest.TestCase):
-    def testRandomSelf(self):
-      for idx in range(16):
-        print("\n**** Index save-load test ****\n")
-        import hnswlib
-        import numpy as np
-        
-        np.random.seed(idx)
-        dim = 16
-        num_elements = 10000
-
-        # Generating sample data
-        data = np.float32(np.random.random((num_elements, dim)))
-
-        # Declaring index
-        p = hnswlib.Index(space='l2', dim=dim)  # possible options are l2, cosine or ip
-
-        # Initing index
-        # max_elements - the maximum number of elements, should be known beforehand
-        #     (probably will be made optional in the future)
-        #
-        # ef_construction - controls index search speed/build speed tradeoff
-        # M - is tightly connected with internal dimensionality of the data
-        #     stronlgy affects the memory consumption
-
-        p.init_index(max_elements = num_elements, ef_construction = 100, M = 16)
-
-        # Controlling the recall by setting ef:
-        # higher ef leads to better accuracy, but slower search
-        p.set_ef(100)
-
-        p.set_num_threads(4)  # by default using all available cores
-
-        # We split the data in two batches:
-        data1 = data[:num_elements // 2]
-        data2 = data[num_elements // 2:]
-
-        print("Adding first batch of %d elements" % (len(data1)))
-        p.add_items(data1)
-
-        # Query the elements for themselves and measure recall:
-        labels, distances = p.knn_query(data1, k=1)
-
-        items=p.get_items(labels)
-
-        # Check the recall:
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))),1.0,3)
-
-        # Check that the returned element data is correct:
-        diff_with_gt_labels=np.mean(np.abs(data1-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-4)
-
-        # Serializing and deleting the index.
-        # We need the part to check that serialization is working properly.
-
-        index_path = 'first_half.bin'
-        print("Saving index to '%s'" % index_path)
-        p.save_index(index_path)
-        print("Saved. Deleting...")
-        del p
-        print("Deleted")
-
-        print("\n**** Mark delete test ****\n")
-        # Reiniting, loading the index
-        print("Reiniting")
-        p = hnswlib.Index(space='l2', dim=dim)
-
-        print("\nLoading index from '%s'\n" % index_path)
-        p.load_index(index_path)
-        p.set_ef(100)
-
-        print("Adding the second batch of %d elements" % (len(data2)))
-        p.add_items(data2)
-
-        # Query the elements for themselves and measure recall:
-        labels, distances = p.knn_query(data, k=1)
-        items=p.get_items(labels)
-
-        # Check the recall:
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))),1.0,3)
-
-        # Check that the returned element data is correct:
-        diff_with_gt_labels=np.mean(np.abs(data-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-4) # deleting index.
-
-        # Checking that all labels are returned correctly:
-        sorted_labels=sorted(p.get_ids_list())
-        self.assertEqual(np.sum(~np.asarray(sorted_labels)==np.asarray(range(num_elements))),0)
-
-        # Delete data1
-        labels1, _ = p.knn_query(data1, k=1)
-
-        for l in labels1:
-            p.mark_deleted(l[0])
-        labels2, _ = p.knn_query(data2, k=1)
-        items=p.get_items(labels2)
-        diff_with_gt_labels=np.mean(np.abs(data2-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-3) # console
-
-
-        labels1_after, _ = p.knn_query(data1, k=1)
-        for la in labels1_after:
-            for lb in labels1:
-                if la[0] == lb[0]:
-                    self.assertTrue(False)
-        print("All the data in data1 are removed")
+import hnswlib
 
-        # checking saving/loading index with elements marked as deleted
-        del_index_path = "with_deleted.bin"
-        p.save_index(del_index_path)
-        p = hnswlib.Index(space='l2', dim=dim)
-        p.load_index(del_index_path)
-        p.set_ef(100)
 
-        labels1_after, _ = p.knn_query(data1, k=1)
-        for la in labels1_after:
-            for lb in labels1:
-                if la[0] == lb[0]:
-                    self.assertTrue(False)
-      
-      os.remove(index_path)
-      os.remove(del_index_path)
+class RandomSelfTestCase(unittest.TestCase):
+    def testRandomSelf(self):
+        for idx in range(16):
+            print("\n**** Index save-load test ****\n")
 
+            np.random.seed(idx)
+            dim = 16
+            num_elements = 10000
 
+            # Generating sample data
+            data = np.float32(np.random.random((num_elements, dim)))
 
-if __name__ == "__main__":
-    unittest.main()
+            # Declaring index
+            p = hnswlib.Index(space='l2', dim=dim)  # possible options are l2, cosine or ip
+
+            # Initing index
+            # max_elements - the maximum number of elements, should be known beforehand
+            #     (probably will be made optional in the future)
+            #
+            # ef_construction - controls index search speed/build speed tradeoff
+            # M - is tightly connected with internal dimensionality of the data
+            #     stronlgy affects the memory consumption
+
+            p.init_index(max_elements=num_elements, ef_construction=100, M=16)
+
+            # Controlling the recall by setting ef:
+            # higher ef leads to better accuracy, but slower search
+            p.set_ef(100)
+
+            p.set_num_threads(4)  # by default using all available cores
+
+            # We split the data in two batches:
+            data1 = data[:num_elements // 2]
+            data2 = data[num_elements // 2:]
+
+            print("Adding first batch of %d elements" % (len(data1)))
+            p.add_items(data1)
+
+            # Query the elements for themselves and measure recall:
+            labels, distances = p.knn_query(data1, k=1)
+
+            items=p.get_items(labels)
+
+            # Check the recall:
+            self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3)
+
+            # Check that the returned element data is correct:
+            diff_with_gt_labels=np.mean(np.abs(data1-items))
+            self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
+
+            # Serializing and deleting the index.
+            # We need the part to check that serialization is working properly.
+
+            index_path = 'first_half.bin'
+            print("Saving index to '%s'" % index_path)
+            p.save_index(index_path)
+            print("Saved. Deleting...")
+            del p
+            print("Deleted")
+
+            print("\n**** Mark delete test ****\n")
+            # Reiniting, loading the index
+            print("Reiniting")
+            p = hnswlib.Index(space='l2', dim=dim)
+
+            print("\nLoading index from '%s'\n" % index_path)
+            p.load_index(index_path)
+            p.set_ef(100)
+
+            print("Adding the second batch of %d elements" % (len(data2)))
+            p.add_items(data2)
+
+            # Query the elements for themselves and measure recall:
+            labels, distances = p.knn_query(data, k=1)
+            items=p.get_items(labels)
+
+            # Check the recall:
+            self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3)
+
+            # Check that the returned element data is correct:
+            diff_with_gt_labels=np.mean(np.abs(data-items))
+            self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4) # deleting index.
+
+            # Checking that all labels are returned correctly:
+            sorted_labels=sorted(p.get_ids_list())
+            self.assertEqual(np.sum(~np.asarray(sorted_labels) == np.asarray(range(num_elements))), 0)
+
+            # Delete data1
+            labels1, _ = p.knn_query(data1, k=1)
+
+            for l in labels1:
+                p.mark_deleted(l[0])
+            labels2, _ = p.knn_query(data2, k=1)
+            items=p.get_items(labels2)
+            diff_with_gt_labels = np.mean(np.abs(data2-items))
+            self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-3) # console
+
+            labels1_after, _ = p.knn_query(data1, k=1)
+            for la in labels1_after:
+                for lb in labels1:
+                    if la[0] == lb[0]:
+                        self.assertTrue(False)
+            print("All the data in data1 are removed")
+
+            # checking saving/loading index with elements marked as deleted
+            del_index_path = "with_deleted.bin"
+            p.save_index(del_index_path)
+            p = hnswlib.Index(space='l2', dim=dim)
+            p.load_index(del_index_path)
+            p.set_ef(100)
+
+            labels1_after, _ = p.knn_query(data1, k=1)
+            for la in labels1_after:
+                for lb in labels1:
+                    if la[0] == lb[0]:
+                        self.assertTrue(False)
+
+        os.remove(index_path)
+        os.remove(del_index_path)
diff --git a/python_bindings/tests/bindings_test_pickle.py b/python_bindings/tests/bindings_test_pickle.py
index 6c3a826a..3a42df2e 100644
--- a/python_bindings/tests/bindings_test_pickle.py
+++ b/python_bindings/tests/bindings_test_pickle.py
@@ -1,28 +1,30 @@
+import pickle
 import unittest
 
 import numpy as np
+
 import hnswlib
-import pickle
 
 
 def get_dist(metric, pt1, pt2):
     if metric == 'l2':
         return np.sum((pt1-pt2)**2)
     elif metric == 'ip':
-        return 1. - np.sum(np.multiply(pt1,pt2))
+        return 1. - np.sum(np.multiply(pt1, pt2))
     elif metric == 'cosine':
-        return 1. - np.sum(np.multiply(pt1,pt2)) / (np.sum(pt1**2) * np.sum(pt2**2))**.5
+        return 1. - np.sum(np.multiply(pt1, pt2)) / (np.sum(pt1**2) * np.sum(pt2**2))**.5
+
 
 def brute_force_distances(metric, items, query_items, k):
-    dists=np.zeros((query_items.shape[0], items.shape[0]))
+    dists = np.zeros((query_items.shape[0], items.shape[0]))
     for ii in range(items.shape[0]):
         for jj in range(query_items.shape[0]):
-            dists[jj,ii]=get_dist(metric, items[ii, :], query_items[jj, :])
+            dists[jj,ii] = get_dist(metric, items[ii, :], query_items[jj, :])
 
     labels = np.argsort(dists, axis=1) # equivalent, but faster: np.argpartition(dists, range(k), axis=1)
     dists = np.sort(dists, axis=1)     # equivalent, but faster: np.partition(dists, range(k), axis=1)
 
-    return labels[:,:k], dists[:,:k]
+    return labels[:, :k], dists[:, :k]
 
 
 def check_ann_results(self, metric, items, query_items, k, ann_l, ann_d, err_thresh=0, total_thresh=0, dists_thresh=0):
@@ -36,14 +38,15 @@ def check_ann_results(self, metric, items, query_items, k, ann_l, ann_d, err_thr
         if err > err_thresh:
             err_total += 1
 
-    self.assertLessEqual( err_total, total_thresh, f"Error: knn_query returned incorrect labels for {err_total} items (k={k})")
+    self.assertLessEqual(err_total, total_thresh, f"Error: knn_query returned incorrect labels for {err_total} items (k={k})")
 
-    wrong_dists=np.sum(((brute_d- ann_d)**2.)>1e-3)
+    wrong_dists = np.sum(((brute_d - ann_d)**2.) > 1e-3)
     if wrong_dists > 0:
-        dists_count=brute_d.shape[0]*brute_d.shape[1]
+        dists_count = brute_d.shape[0]*brute_d.shape[1]
         print(f"Warning: {wrong_dists} ann distance values are different from brute-force values (total # of values={dists_count}, dists_thresh={dists_thresh})")
 
-    self.assertLessEqual( wrong_dists, dists_thresh, msg=f"Error: {wrong_dists} ann distance values are different from brute-force values")
+    self.assertLessEqual(wrong_dists, dists_thresh, msg=f"Error: {wrong_dists} ann distance values are different from brute-force values")
+
 
 def test_space_main(self, space, dim):
 
@@ -55,16 +58,16 @@ def test_space_main(self, space, dim):
     p = hnswlib.Index(space=space, dim=dim)  # possible options are l2, cosine or ip
     print(f"Running pickle tests for {p}")
 
-    p.num_threads=self.num_threads  # by default using all available cores
+    p.num_threads = self.num_threads  # by default using all available cores
 
-    p0=pickle.loads(pickle.dumps(p)) ### pickle un-initialized Index
-    p.init_index(max_elements = self.num_elements, ef_construction = self.ef_construction, M = self.M)
-    p0.init_index(max_elements = self.num_elements, ef_construction = self.ef_construction, M = self.M)
+    p0 = pickle.loads(pickle.dumps(p)) ### pickle un-initialized Index
+    p.init_index(max_elements=self.num_elements, ef_construction=self.ef_construction, M=self.M)
+    p0.init_index(max_elements=self.num_elements, ef_construction=self.ef_construction, M=self.M)
 
-    p.ef=self.ef
-    p0.ef=self.ef
+    p.ef = self.ef
+    p0.ef = self.ef
 
-    p1=pickle.loads(pickle.dumps(p)) ### pickle Index before adding items
+    p1 = pickle.loads(pickle.dumps(p)) ### pickle Index before adding items
 
     ### add items to ann index p,p0,p1
     p.add_items(data)
@@ -78,7 +81,7 @@ def test_space_main(self, space, dim):
     self.assertTrue(np.allclose(p1.get_items(), p2.get_items()), "items for p1 and p2 must be same")
 
     ### Test if returned distances are same
-    l, d   = p.knn_query(test_data, k=self.k)
+    l, d = p.knn_query(test_data, k=self.k)
     l0, d0 = p0.knn_query(test_data, k=self.k)
     l1, d1 = p1.knn_query(test_data, k=self.k)
     l2, d2 = p2.knn_query(test_data, k=self.k)
@@ -90,9 +93,9 @@ def test_space_main(self, space, dim):
     ### check if ann results match brute-force search
     ###   allow for 2 labels to be missing from ann results
     check_ann_results(self, space, data, test_data, self.k, l, d,
-                           err_thresh = self.label_err_thresh,
-                           total_thresh = self.item_err_thresh,
-                           dists_thresh = self.dists_err_thresh)
+                           err_thresh=self.label_err_thresh,
+                           total_thresh=self.item_err_thresh,
+                           dists_thresh=self.dists_err_thresh)
 
     check_ann_results(self, space, data, test_data, self.k, l2, d2,
                            err_thresh=self.label_err_thresh,
@@ -118,7 +121,6 @@ def test_space_main(self, space, dim):
     self.assertEqual(p2.ef_construction, self.ef_construction, "incorrect value of p2.ef_construction")
 
 
-
 class PickleUnitTests(unittest.TestCase):
 
     def setUp(self):
@@ -133,10 +135,10 @@ def setUp(self):
         self.num_threads = 4
         self.k = 25
 
-        self.label_err_thresh=5  ### max number of missing labels allowed per test item
-        self.item_err_thresh=5   ### max number of items allowed with incorrect labels
+        self.label_err_thresh = 5  ### max number of missing labels allowed per test item
+        self.item_err_thresh = 5   ### max number of items allowed with incorrect labels
 
-        self.dists_err_thresh=50 ### for two matrices, d1 and d2, dists_err_thresh controls max
+        self.dists_err_thresh = 50 ### for two matrices, d1 and d2, dists_err_thresh controls max
                                  ### number of value pairs that are allowed to be different in d1 and d2
                                  ### i.e., number of values that are (d1-d2)**2>1e-3
 
@@ -148,6 +150,3 @@ def test_l2_space(self):
 
     def test_cosine_space(self):
         test_space_main(self, 'cosine', 512)
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python_bindings/tests/bindings_test_resize.py b/python_bindings/tests/bindings_test_resize.py
index 9411af64..3c4e3e4f 100644
--- a/python_bindings/tests/bindings_test_resize.py
+++ b/python_bindings/tests/bindings_test_resize.py
@@ -1,12 +1,15 @@
 import unittest
 
+import numpy as np
+
+import hnswlib
+
 
 class RandomSelfTestCase(unittest.TestCase):
     def testRandomSelf(self):
       for idx in range(16):
         print("\n**** Index resize test ****\n")
-        import hnswlib
-        import numpy as np
+
         np.random.seed(idx)
         dim = 16
         num_elements = 10000
@@ -25,7 +28,7 @@ def testRandomSelf(self):
         # M - is tightly connected with internal dimensionality of the data
         #     stronlgy affects the memory consumption
 
-        p.init_index(max_elements = num_elements//2, ef_construction = 100, M = 16)
+        p.init_index(max_elements=num_elements//2, ef_construction=100, M=16)
 
         # Controlling the recall by setting ef:
         # higher ef leads to better accuracy, but slower search
@@ -43,20 +46,18 @@ def testRandomSelf(self):
         # Query the elements for themselves and measure recall:
         labels, distances = p.knn_query(data1, k=1)
 
-        items=p.get_items(list(range(len(data1))))
+        items = p.get_items(list(range(len(data1))))
 
         # Check the recall:
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))),1.0,3)
+        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3)
 
         # Check that the returned element data is correct:
-        diff_with_gt_labels=np.max(np.abs(data1-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-4)
+        diff_with_gt_labels = np.max(np.abs(data1-items))
+        self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
 
         print("Resizing the index")
         p.resize_index(num_elements)
 
-
-
         print("Adding the second batch of %d elements" % (len(data2)))
         p.add_items(data2)
 
@@ -65,18 +66,12 @@ def testRandomSelf(self):
         items=p.get_items(list(range(num_elements)))
 
         # Check the recall:
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))),1.0,3)
+        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3)
 
         # Check that the returned element data is correct:
         diff_with_gt_labels=np.max(np.abs(data-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-4)
+        self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
 
         # Checking that all labels are returned correcly:
         sorted_labels=sorted(p.get_ids_list())
-        self.assertEqual(np.sum(~np.asarray(sorted_labels)==np.asarray(range(num_elements))),0)
-
-
-
-
-if __name__ == "__main__":
-    unittest.main()
+        self.assertEqual(np.sum(~np.asarray(sorted_labels) == np.asarray(range(num_elements))), 0)
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 81fbf192..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-numpy>=1.10.0
-pybind11>=2.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 002f3893..929bc211 100644
--- a/setup.py
+++ b/setup.py
@@ -1,19 +1,28 @@
 import os
-from setuptools import setup, Extension
-from setuptools.command.build_ext import build_ext
 import sys
+
+import numpy as np
+import pybind11
 import setuptools
+from setuptools import Extension, setup
+from setuptools.command.build_ext import build_ext
 
 __version__ = '0.4.0'
 
+
+include_dirs = [
+    pybind11.get_include(),
+    np.get_include(),
+]
+
 # compatibility when run in python_bindings
 bindings_dir = 'python_bindings'
 if bindings_dir in os.path.basename(os.getcwd()):
     source_files = ['./bindings.cpp']
-    include_dirs = ['../hnswlib/']
+    include_dirs.extend(['../hnswlib/'])
 else:
     source_files = ['./python_bindings/bindings.cpp']
-    include_dirs = ['./hnswlib/']
+    include_dirs.extend(['./hnswlib/'])
 
 
 libraries = []
@@ -90,21 +99,9 @@ def build_extensions(self):
         elif ct == 'msvc':
             opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version())
 
-        # extend include dirs here (don't assume numpy/pybind11 are installed when first run, since
-        # pip could have installed them as part of executing this script
-        import pybind11
-        import numpy as np
         for ext in self.extensions:
             ext.extra_compile_args.extend(opts)
             ext.extra_link_args.extend(self.link_opts.get(ct, []))
-            ext.include_dirs.extend([
-                # Path to pybind11 headers
-                pybind11.get_include(),
-                pybind11.get_include(True),
-
-                # Path to numpy headers
-                np.get_include()
-            ])
 
         build_ext.build_extensions(self)
 
@@ -117,8 +114,7 @@ def build_extensions(self):
     url='https://github.com/yurymalkov/hnsw',
     long_description="""hnsw""",
     ext_modules=ext_modules,
-    install_requires=['pybind11>=2.0', 'numpy'],
+    install_requires=['numpy'],
     cmdclass={'build_ext': BuildExt},
-    test_suite="python_bindings.tests",
     zip_safe=False,
 )