Skip to content

Commit e7a3034

Browse files
Remove faiss and cuvs dependencies
1 parent d721170 commit e7a3034

File tree

4 files changed

+74
-128
lines changed

4 files changed

+74
-128
lines changed

pyproject.toml

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,13 @@ dependencies = [
4343
]
4444

4545
[project.optional-dependencies]
46-
faiss = [
47-
"faiss-cpu>=1.7.0",
48-
"faiss-gpu>=1.7.0"
49-
]
5046

51-
rapids11 = [
52-
"cupy-cuda11x",
53-
"cuvs-cu11==24.4.*",
47+
cuda11 = [
48+
"cupy-cuda11x"
5449
]
5550

56-
rapids12 = [
57-
"cupy-cuda12x",
58-
"cuvs-cu12==24.4.*",
51+
cuda12 = [
52+
"cupy-cuda12x"
5953
]
6054

6155
multiprocessing = ["multiprocessing"]

src/segger/data/utils.py

Lines changed: 70 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,12 @@ def try_import(module_name):
3232
# Attempt to import specific modules with try_import function
3333
try_import("multiprocessing")
3434
try_import("joblib")
35-
try_import("faiss")
36-
try_import("cuvs")
37-
try:
38-
import cupy as cp
39-
from cuvs.neighbors import cagra
40-
except ImportError:
41-
print(f"Warning: cupy and/or cuvs are not installed. Please install them to use this functionality.")
35+
# try_import("cuvs")
36+
# try:
37+
# import cupy as cp
38+
# from cuvs.neighbors import cagra
39+
# except ImportError:
40+
# print(f"Warning: cupy and/or cuvs are not installed. Please install them to use this functionality.")
4241

4342
import torch.utils.dlpack as dlpack
4443
from datetime import timedelta
@@ -270,33 +269,28 @@ def get_edge_index(
270269
k: int = 5,
271270
dist: int = 10,
272271
method: str = "kd_tree",
273-
gpu: bool = False,
274272
workers: int = 1,
275273
) -> torch.Tensor:
276274
"""
277-
Computes edge indices using various methods (KD-Tree, FAISS, RAPIDS::cuvs+cupy (cuda)).
275+
Computes edge indices using KD-Tree.
278276
279277
Parameters:
280278
coords_1 (np.ndarray): First set of coordinates.
281279
coords_2 (np.ndarray): Second set of coordinates.
282280
k (int, optional): Number of nearest neighbors.
283281
dist (int, optional): Distance threshold.
284-
method (str, optional): The method to use ('kd_tree', 'faiss', 'cuda').
285-
gpu (bool, optional): Whether to use GPU acceleration (applicable for FAISS).
282+
method (str, optional): The method to use. Only 'kd_tree' is supported now.
286283
287284
Returns:
288285
torch.Tensor: Edge indices.
289286
"""
290287
if method == "kd_tree":
291288
return get_edge_index_kdtree(coords_1, coords_2, k=k, dist=dist, workers=workers)
292-
elif method == "faiss":
293-
return get_edge_index_faiss(coords_1, coords_2, k=k, dist=dist, gpu=gpu)
294-
elif method == "cuda":
295-
# pass
296-
return get_edge_index_cuda(coords_1, coords_2, k=k, dist=dist)
289+
# elif method == "cuda":
290+
# return get_edge_index_cuda(coords_1, coords_2, k=k, dist=dist)
297291
else:
298-
msg = f"Unknown method {method}. Valid methods include: 'kd_tree', " "'faiss', and 'cuda'."
299-
raise ValueError()
292+
msg = f"Unknown method {method}. The only supported method is 'kd_tree' now."
293+
raise ValueError(msg)
300294

301295

302296
def get_edge_index_kdtree(
@@ -332,104 +326,64 @@ def get_edge_index_kdtree(
332326
return edge_index
333327

334328

335-
def get_edge_index_faiss(
336-
coords_1: np.ndarray, coords_2: np.ndarray, k: int = 5, dist: int = 10, gpu: bool = False
337-
) -> torch.Tensor:
338-
"""
339-
Computes edge indices using FAISS.
340-
341-
Parameters:
342-
coords_1 (np.ndarray): First set of coordinates.
343-
coords_2 (np.ndarray): Second set of coordinates.
344-
k (int, optional): Number of nearest neighbors.
345-
dist (int, optional): Distance threshold.
346-
gpu (bool, optional): Whether to use GPU acceleration.
347-
348-
Returns:
349-
torch.Tensor: Edge indices.
350-
"""
351-
coords_1 = np.ascontiguousarray(coords_1, dtype=np.float32)
352-
coords_2 = np.ascontiguousarray(coords_2, dtype=np.float32)
353-
d = coords_1.shape[1]
354-
if gpu:
355-
res = faiss.StandardGpuResources()
356-
index = faiss.GpuIndexFlatL2(res, d)
357-
else:
358-
index = faiss.IndexFlatL2(d)
359-
360-
index.add(coords_1.astype("float32"))
361-
D, I = index.search(coords_2.astype("float32"), k)
362-
363-
valid_mask = D < dist**2
364-
edges = []
365-
366-
for idx, valid in enumerate(valid_mask):
367-
valid_indices = I[idx][valid]
368-
if valid_indices.size > 0:
369-
edges.append(np.vstack((np.full(valid_indices.shape, idx), valid_indices)).T)
370-
371-
edge_index = torch.tensor(np.vstack(edges), dtype=torch.long).contiguous()
372-
return edge_index
373-
374-
375-
def get_edge_index_cuda(
376-
coords_1: torch.Tensor,
377-
coords_2: torch.Tensor,
378-
k: int = 10,
379-
dist: float = 10.0,
380-
metric: str = "sqeuclidean",
381-
nn_descent_niter: int = 100,
382-
) -> torch.Tensor:
383-
"""
384-
Computes edge indices using RAPIDS cuVS with cagra for vector similarity search,
385-
with input coordinates as PyTorch tensors on CUDA, using DLPack for conversion.
386-
387-
Parameters:
388-
coords_1 (torch.Tensor): First set of coordinates (query vectors) on CUDA.
389-
coords_2 (torch.Tensor): Second set of coordinates (index vectors) on CUDA.
390-
k (int, optional): Number of nearest neighbors.
391-
dist (float, optional): Distance threshold.
392-
393-
Returns:
394-
torch.Tensor: Edge indices as a PyTorch tensor on CUDA.
395-
"""
396-
397-
def cupy_to_torch(cupy_array):
398-
return torch.from_dlpack((cupy_array.toDlpack()))
399-
400-
# gg
401-
def torch_to_cupy(tensor):
402-
return cp.fromDlpack(dlpack.to_dlpack(tensor))
403-
404-
# Convert PyTorch tensors (CUDA) to CuPy arrays using DLPack
405-
cp_coords_1 = torch_to_cupy(coords_1).astype(cp.float32)
406-
cp_coords_2 = torch_to_cupy(coords_2).astype(cp.float32)
407-
# Define the distance threshold in CuPy
408-
cp_dist = cp.float32(dist)
409-
# IndexParams and SearchParams for cagra
410-
# compression_params = cagra.CompressionParams(pq_bits=pq_bits)
411-
index_params = cagra.IndexParams(
412-
metric=metric, nn_descent_niter=nn_descent_niter
413-
) # , compression=compression_params)
414-
search_params = cagra.SearchParams()
415-
# Build index using CuPy coords
416-
try:
417-
index = cagra.build(index_params, cp_coords_1)
418-
except AttributeError:
419-
index = cagra.build_index(index_params, cp_coords_1)
420-
# Perform search to get distances and indices (still in CuPy)
421-
D, I = cagra.search(search_params, index, cp_coords_2, k)
422-
# Boolean mask for filtering distances below the squared threshold (all in CuPy)
423-
valid_mask = cp.asarray(D < cp_dist**2)
424-
# Vectorized operations for row and valid indices (all in CuPy)
425-
repeats = valid_mask.sum(axis=1).tolist()
426-
row_indices = cp.repeat(cp.arange(len(cp_coords_2)), repeats)
427-
valid_indices = cp.asarray(I)[cp.where(valid_mask)]
428-
# Stack row indices with valid indices to form edges
429-
edges = cp.vstack((row_indices, valid_indices)).T
430-
# Convert the result back to a PyTorch tensor using DLPack
431-
edge_index = cupy_to_torch(edges).long().contiguous()
432-
return edge_index
329+
# def get_edge_index_cuda(
330+
# coords_1: torch.Tensor,
331+
# coords_2: torch.Tensor,
332+
# k: int = 10,
333+
# dist: float = 10.0,
334+
# metric: str = "sqeuclidean",
335+
# nn_descent_niter: int = 100,
336+
# ) -> torch.Tensor:
337+
# """
338+
# Computes edge indices using RAPIDS cuVS with cagra for vector similarity search,
339+
# with input coordinates as PyTorch tensors on CUDA, using DLPack for conversion.
340+
341+
# Parameters:
342+
# coords_1 (torch.Tensor): First set of coordinates (query vectors) on CUDA.
343+
# coords_2 (torch.Tensor): Second set of coordinates (index vectors) on CUDA.
344+
# k (int, optional): Number of nearest neighbors.
345+
# dist (float, optional): Distance threshold.
346+
347+
# Returns:
348+
# torch.Tensor: Edge indices as a PyTorch tensor on CUDA.
349+
# """
350+
351+
# def cupy_to_torch(cupy_array):
352+
# return torch.from_dlpack((cupy_array.toDlpack()))
353+
354+
# # gg
355+
# def torch_to_cupy(tensor):
356+
# return cp.fromDlpack(dlpack.to_dlpack(tensor))
357+
358+
# # Convert PyTorch tensors (CUDA) to CuPy arrays using DLPack
359+
# cp_coords_1 = torch_to_cupy(coords_1).astype(cp.float32)
360+
# cp_coords_2 = torch_to_cupy(coords_2).astype(cp.float32)
361+
# # Define the distance threshold in CuPy
362+
# cp_dist = cp.float32(dist)
363+
# # IndexParams and SearchParams for cagra
364+
# # compression_params = cagra.CompressionParams(pq_bits=pq_bits)
365+
# index_params = cagra.IndexParams(
366+
# metric=metric, nn_descent_niter=nn_descent_niter
367+
# ) # , compression=compression_params)
368+
# search_params = cagra.SearchParams()
369+
# # Build index using CuPy coords
370+
# try:
371+
# index = cagra.build(index_params, cp_coords_1)
372+
# except AttributeError:
373+
# index = cagra.build_index(index_params, cp_coords_1)
374+
# # Perform search to get distances and indices (still in CuPy)
375+
# D, I = cagra.search(search_params, index, cp_coords_2, k)
376+
# # Boolean mask for filtering distances below the squared threshold (all in CuPy)
377+
# valid_mask = cp.asarray(D < cp_dist**2)
378+
# # Vectorized operations for row and valid indices (all in CuPy)
379+
# repeats = valid_mask.sum(axis=1).tolist()
380+
# row_indices = cp.repeat(cp.arange(len(cp_coords_2)), repeats)
381+
# valid_indices = cp.asarray(I)[cp.where(valid_mask)]
382+
# # Stack row indices with valid indices to form edges
383+
# edges = cp.vstack((row_indices, valid_indices)).T
384+
# # Convert the result back to a PyTorch tensor using DLPack
385+
# edge_index = cupy_to_torch(edges).long().contiguous()
386+
# return edge_index
433387

434388

435389
class SpatialTranscriptomicsDataset(InMemoryDataset):

src/segger/prediction/predict_multigpu.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from torch_geometric.loader import DataLoader
1414
from torch_geometric.data import Batch
1515
from segger.data.utils import (
16-
get_edge_index_cuda,
1716
get_edge_index,
1817
format_time,
1918
create_anndata,

src/segger/prediction/predict_parquet.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from torch_geometric.loader import DataLoader
1515
from torch_geometric.data import Batch
1616
from segger.data.utils import (
17-
get_edge_index_cuda,
1817
get_edge_index,
1918
format_time,
2019
create_anndata,

0 commit comments

Comments
 (0)