implemented custom process management , instead of using pool

slice4e · slice4e · commit 4c1d080cf960 · 2025-04-08T14:18:55.000-07:00
diff --git a/datasets/datasets.json b/datasets/datasets.json
@@ -330,6 +330,14 @@
     "type": "h5",
     "path": "laion-img-emb-512/laion-img-emb-512-1M-cosine.hdf5",
     "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/laion400m/laion-img-emb-512-100M-cosine.hdf5"
+  },
+  {
+    "name": "laion-img-emb-512-1M-100ktrain-cosine",
+    "vector_size": 512,
+    "distance": "cosine",
+    "type": "h5",
+    "path": "laion-img-emb-512/laion-img-emb-512-1M-100ktrain-cosine.hdf5",
+    "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/laion400m/laion-img-emb-512-100M-cosine.hdf5"
   }
 
 ]
diff --git a/engine/base_client/client.py b/engine/base_client/client.py
@@ -36,8 +36,9 @@ def save_search_results(
     ):
         now = datetime.now()
         timestamp = now.strftime("%Y-%m-%d-%H-%M-%S")
+        pid = os.getpid()  # Get the current process ID
         experiments_file = (
-            f"{self.name}-{dataset_name}-search-{search_id}-{timestamp}.json"
+            f"{self.name}-{dataset_name}-search-{search_id}-{pid}-{timestamp}.json"
         )
         result_path = RESULTS_DIR / experiments_file
         with open(result_path, "w") as out:
@@ -89,7 +90,8 @@ def run_experiment(
         reader = dataset.get_reader(execution_params.get("normalize", False))
 
         if skip_if_exists:
-            glob_pattern = f"{self.name}-{dataset.config.name}-search-*-*.json"
+            pid = os.getpid()  # Get the current process ID
+            glob_pattern = f"{self.name}-{dataset.config.name}-search-{pid}-*-*.json"
             existing_results = list(RESULTS_DIR.glob(glob_pattern))
             if len(existing_results) == len(self.searchers):
                 print(
@@ -124,8 +126,9 @@ def run_experiment(
             for search_id, searcher in enumerate(self.searchers):
 
                 if skip_if_exists:
+                    pid = os.getpid()  # Get the current process ID
                     glob_pattern = (
-                        f"{self.name}-{dataset.config.name}-search-{search_id}-*.json"
+                        f"{self.name}-{dataset.config.name}-search-{search_id}-{pid}-*.json"
                     )
                     existing_results = list(RESULTS_DIR.glob(glob_pattern))
                     print("Pattern", glob_pattern, "Results:", existing_results)
diff --git a/engine/base_client/search.py b/engine/base_client/search.py
@@ -1,6 +1,6 @@
 import functools
 import time
-from multiprocessing import get_context, Barrier
+from multiprocessing import get_context, Barrier, Process, Queue
 from typing import Iterable, List, Optional, Tuple
 from itertools import islice
 
@@ -75,42 +75,52 @@ def search_all(
 
         search_one = functools.partial(self.__class__._search_one, top=top)
 
+        # Initialize the start time
+        start = time.perf_counter()
+
         if parallel == 1:
-            start = time.perf_counter()
+            # Single-threaded execution
             precisions, latencies = list(
                 zip(*[search_one(query) for query in tqdm.tqdm(queries)])
             )
         else:
-            ctx = get_context(self.get_mp_start_method())
-
-            # Create a Barrier to synchronize processes
-            barrier = Barrier(parallel)
+            # Dynamically calculate chunk size
+            chunk_size = max(1, len(queries) // parallel)
+            query_chunks = list(chunked_iterable(queries, chunk_size))
 
-            def process_initializer():
-                """Initialize each process before starting the search."""
+            # Function to be executed by each worker process
+            def worker_function(chunk, result_queue):
                 self.__class__.init_client(
                     self.host,
                     distance,
                     self.connection_params,
                     self.search_params,
                 )
                 self.setup_search()
-                barrier.wait()  # Wait for all processes to be ready
+                results = process_chunk(chunk, search_one)
+                result_queue.put(results)
 
-            # Dynamically calculate chunk size
-            chunk_size = max(1, len(queries) // parallel)
-            query_chunks = list(chunked_iterable(queries, chunk_size))
+            # Create a queue to collect results
+            result_queue = Queue()
 
-            with ctx.Pool(
-                processes=parallel,
-                initializer=process_initializer,
-            ) as pool:
-                start = time.perf_counter()
-                results = pool.starmap(
-                    process_chunk,
-                    [(chunk, search_one) for chunk in query_chunks],
-                )
-                precisions, latencies = zip(*[result for chunk in results for result in chunk])
+            # Create and start worker processes
+            processes = []
+            for chunk in query_chunks:
+                process = Process(target=worker_function, args=(chunk, result_queue))
+                processes.append(process)
+                process.start()
+
+            # Collect results from all worker processes
+            results = []
+            for _ in processes:
+                results.extend(result_queue.get())
+
+            # Wait for all worker processes to finish
+            for process in processes:
+                process.join()
+
+            # Extract precisions and latencies
+            precisions, latencies = zip(*results)
 
         total_time = time.perf_counter() - start
 
@@ -151,3 +161,8 @@ def chunked_iterable(iterable, size):
 def process_chunk(chunk, search_one):
     """Process a chunk of queries using the search_one function."""
     return [search_one(query) for query in chunk]
+
+
+def process_chunk_wrapper(chunk, search_one):
+    """Wrapper to process a chunk of queries."""
+    return process_chunk(chunk, search_one)

Original file line number	Diff line number	Diff line change
`@@ -330,6 +330,14 @@`
`330`	`330`	`"type": "h5",`
`331`	`331`	`"path": "laion-img-emb-512/laion-img-emb-512-1M-cosine.hdf5",`
`332`	`332`	`"link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/laion400m/laion-img-emb-512-100M-cosine.hdf5"`
	`333`	`+ },`
	`334`	`+ {`
	`335`	`+ "name": "laion-img-emb-512-1M-100ktrain-cosine",`
	`336`	`+ "vector_size": 512,`
	`337`	`+ "distance": "cosine",`
	`338`	`+ "type": "h5",`
	`339`	`+ "path": "laion-img-emb-512/laion-img-emb-512-1M-100ktrain-cosine.hdf5",`
	`340`	`+ "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/laion400m/laion-img-emb-512-100M-cosine.hdf5"`
`333`	`341`	`}`
`334`	`342`
`335`	`343`	`]`