Skip to content

Commit fcbeb75

Browse files
committed
don't know but changed predict_parquet
1 parent b210ce4 commit fcbeb75

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

src/segger/prediction/predict_parquet.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from segger.prediction.boundary import generate_boundaries
2727

2828
from scipy.sparse.csgraph import connected_components as cc
29-
from typing import Union, Dict
29+
from typing import Union, Dict, Tuple
3030
import dask.dataframe as dd
3131
from dask import delayed
3232
from dask.diagnostics import ProgressBar
@@ -287,14 +287,19 @@ def sparse_multiply(embeddings, edge_index, shape) -> coo_matrix:
287287
# shape = batch[from_type].x.shape[0], batch[to_type].x.shape[0]
288288
indices = torch.argwhere(edge_index != -1).T
289289
indices[1] = edge_index[edge_index != -1]
290-
rows = cp.fromDlpack(to_dlpack(indices[0, :].to("cuda")))
291-
columns = cp.fromDlpack(to_dlpack(indices[1, :].to("cuda")))
290+
indices_gpu = indices.to("cuda") # Keep reference
291+
rows = cp.fromDlpack(to_dlpack(indices_gpu[0, :]))
292+
columns = cp.fromDlpack(to_dlpack(indices_gpu[1, :]))
293+
del indices_gpu # Delete only after CuPy arrays exist
294+
stream = cp.cuda.get_current_stream()
295+
stream.synchronize() # <-- ADD THIS
292296
# print(rows)
293297
del indices
294298
values = similarity[edge_index != -1].flatten()
295299
sparse_result = coo_matrix(
296300
(cp.fromDlpack(to_dlpack(values)), (rows, columns)), shape=shape
297301
)
302+
stream.synchronize()
298303
return sparse_result
299304
# Free GPU memory after computation
300305

@@ -364,21 +369,23 @@ def _get_id():
364369
# Convert sparse matrix to dense format (on GPU)
365370
dense_scores = scores.toarray() # Convert to dense NumPy array
366371
del scores # Remove from memory
367-
cp.get_default_memory_pool().free_all_blocks() # Free CuPy memory
372+
cp.cuda.Stream.null.synchronize()
373+
# cp.get_default_memory_pool().free_all_blocks() # Free CuPy memory
368374

369375
# Step 2: Maximize score and assign transcripts based on score threshold
370376
belongs = cp.max(dense_scores, axis=1) # Max score per transcript
371377
assignments["score"] = cp.asnumpy(belongs) # Move back to CPU
372378

373379
mask = assignments["score"] >= score_cut # Mask for assigned transcripts
380+
cp.cuda.Stream.null.synchronize()
374381
all_ids = np.concatenate(batch["bd"].id) # Boundary IDs as NumPy array
375382
assignments["segger_cell_id"] = np.where(
376383
mask, all_ids[cp.argmax(dense_scores, axis=1).get()], None
377384
)
378385

379386
# Clear memory after score processing
380387
del dense_scores
381-
cp.get_default_memory_pool().free_all_blocks() # Free CuPy memory
388+
# cp.get_default_memory_pool().free_all_blocks() # Free CuPy memory
382389
torch.cuda.empty_cache()
383390

384391
assignments["bound"] = np.where(
@@ -470,7 +477,7 @@ def _get_id():
470477
delayed_write_output_ddf.persist() # Schedule writing
471478

472479
# Free memory after computation
473-
cp.get_default_memory_pool().free_all_blocks() # Free CuPy memory
480+
# cp.get_default_memory_pool().free_all_blocks() # Free CuPy memory
474481
torch.cuda.empty_cache()
475482

476483

0 commit comments

Comments
 (0)