Skip to content

Commit 0e26017

Browse files
Merge pull request #648 from lattice/hotfix/p2p_performance_rank
Add support for P2P performance ranks
2 parents da34069 + 48ea768 commit 0e26017

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

lib/comm_mpi.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,23 @@ void comm_peer2peer_init(const char* hostname_recv_buf)
150150
cudaDeviceCanAccessPeer(&canAccessPeer[0], gpuid, neighbor_gpuid);
151151
cudaDeviceCanAccessPeer(&canAccessPeer[1], neighbor_gpuid, gpuid);
152152

153+
// this was introduced with CUDA 8
154+
#if CUDA_VERSION >= 8000
155+
int accessRank[2];
156+
cudaDeviceGetP2PAttribute(&accessRank[0], cudaDevP2PAttrPerformanceRank, gpuid, neighbor_gpuid);
157+
cudaDeviceGetP2PAttribute(&accessRank[1], cudaDevP2PAttrPerformanceRank, neighbor_gpuid, gpuid);
158+
#endif
159+
153160
if(canAccessPeer[0]*canAccessPeer[1]){
154161
peer2peer_enabled[dir][dim] = true;
155162
if (getVerbosity() > QUDA_SILENT)
156-
printf("Peer-to-peer enabled for rank %d (gpu=%d) with neighbor %d (gpu=%d) dir=%d, dim=%d\n",
157-
comm_rank(), gpuid, neighbor_rank, neighbor_gpuid, dir, dim);
163+
printf("Peer-to-peer enabled for rank %d (gpu=%d) with neighbor %d (gpu=%d) dir=%d, dim=%d, performance rank = (%d, %d)\n",
164+
#if CUDA_VERSION >= 8000
165+
comm_rank(), gpuid, neighbor_rank, neighbor_gpuid, dir, dim, accessRank[0], accessRank[1]);
166+
#else
167+
// default to 0 for CUDA < 8
168+
comm_rank(), gpuid, neighbor_rank, neighbor_gpuid, dir, dim, 0, 0);
169+
#endif
158170
}
159171
} // on the same node
160172
} // different dimensions - x, y, z, t

0 commit comments

Comments
 (0)