@@ -315,15 +315,15 @@ void HSolverLCAO<T, Device>::parakSolve_cusolver(hamilt::Hamilt<T>* pHamilt,
315315 MPI_Comm_rank (MPI_COMM_WORLD, &world_rank);
316316 MPI_Comm_size (MPI_COMM_WORLD, &world_size);
317317
318- // Step 1: Split communicator by shared memory node
318+ // Split communicator by shared memory node
319319 MPI_Comm nodeComm;
320320 MPI_Comm_split_type (MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, world_rank, MPI_INFO_NULL, &nodeComm);
321321
322322 int local_rank, local_size;
323323 MPI_Comm_rank (nodeComm, &local_rank);
324324 MPI_Comm_size (nodeComm, &local_size);
325325
326- // Step 2: Get number of CUDA devices on this node
326+ // Get number of CUDA devices on this node
327327 int device_count = 0 ;
328328 cudaError_t cuda_err = cudaGetDeviceCount (&device_count);
329329 if (cuda_err != cudaSuccess) {
@@ -334,7 +334,6 @@ void HSolverLCAO<T, Device>::parakSolve_cusolver(hamilt::Hamilt<T>* pHamilt,
334334 local_rank = -1 ; // Mark as inactive for GPU work
335335 }
336336
337- // Step 3: Number of active process on this node
338337 // Determine the number of MPI processes on this node that can actively use a GPU.
339338 // This is the minimum of:
340339 // - The number of available MPI processes on the node (local_size)
0 commit comments