Skip to content

Commit 15e0508

Browse files
raffenethzhou
authored andcommitted
ch4/ipc: Add debug logging for sender ipc handle cache
Add logging for handle cache insertion and removal as well as hits and misses when searching. This could be useful for debugging handle cache issues in the field.
1 parent 7eb9f89 commit 15e0508

File tree

4 files changed

+9
-0
lines changed

4 files changed

+9
-0
lines changed

src/mpid/ch4/shm/ipc/gpu/gpu_post.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,9 +206,11 @@ static int ipc_track_cache_search(MPL_gavl_tree_t gavl_tree, const void *addr, u
206206
void *obj = MPL_gavl_tree_search(gavl_tree, addr, len);
207207

208208
if (obj) {
209+
MPL_DBG_MSG_P(MPIDI_CH4_DBG_IPC, VERBOSE, "cached gpu ipc handle HIT for %p", addr);
209210
*handle_out = *((MPL_gpu_ipc_mem_handle_t *) obj);
210211
*found = true;
211212
} else {
213+
MPL_DBG_MSG_P(MPIDI_CH4_DBG_IPC, VERBOSE, "cached gpu ipc handle MISS for %p", addr);
212214
*found = false;
213215
}
214216

@@ -220,6 +222,8 @@ static int ipc_track_cache_insert(MPL_gavl_tree_t gavl_tree, const void *addr, u
220222
{
221223
int mpi_errno = MPI_SUCCESS;
222224

225+
MPL_DBG_MSG_P(MPIDI_CH4_DBG_IPC, VERBOSE, "caching NEW gpu ipc handle for %p", addr);
226+
223227
MPL_gpu_ipc_mem_handle_t *cache_obj = MPL_malloc(sizeof(handle), MPL_MEM_OTHER);
224228
MPIR_ERR_CHKANDJUMP(!cache_obj, mpi_errno, MPI_ERR_OTHER, "**nomem");
225229

@@ -241,6 +245,8 @@ static int ipc_track_cache_remove(const void *addr, uintptr_t len, int local_dev
241245
int mpi_errno = MPI_SUCCESS;
242246
int mpl_err;
243247

248+
MPL_DBG_MSG_P(MPIDI_CH4_DBG_IPC, VERBOSE, "removing STALE gpu ipc handle for %p", addr);
249+
244250
for (int i = 0; i < MPIR_Process.local_size; ++i) {
245251
MPL_gavl_tree_t track_tree = MPIDI_GPUI_global.ipc_handle_track_trees[i][local_dev_id];
246252
mpl_err = MPL_gavl_tree_delete_range(track_tree, addr, len);

src/mpid/ch4/src/ch4_globals.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,4 +163,5 @@ MPL_dbg_class MPIDI_CH4_DBG_GENERAL;
163163
MPL_dbg_class MPIDI_CH4_DBG_MAP;
164164
MPL_dbg_class MPIDI_CH4_DBG_COMM;
165165
MPL_dbg_class MPIDI_CH4_DBG_MEMORY;
166+
MPL_dbg_class MPIDI_CH4_DBG_IPC;
166167
#endif

src/mpid/ch4/src/ch4_init.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ int MPID_Init(int requested, int *provided)
445445
MPIDI_CH4_DBG_MAP = MPL_dbg_class_alloc("CH4_MAP", "ch4_map");
446446
MPIDI_CH4_DBG_COMM = MPL_dbg_class_alloc("CH4_COMM", "ch4_comm");
447447
MPIDI_CH4_DBG_MEMORY = MPL_dbg_class_alloc("CH4_MEMORY", "ch4_memory");
448+
MPIDI_CH4_DBG_IPC = MPL_dbg_class_alloc("CH4_IPC", "ch4_ipc");
448449
#endif
449450

450451
#ifdef HAVE_SIGNAL

src/mpid/ch4/src/ch4_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ extern MPL_dbg_class MPIDI_CH4_DBG_GENERAL;
300300
extern MPL_dbg_class MPIDI_CH4_DBG_MAP;
301301
extern MPL_dbg_class MPIDI_CH4_DBG_COMM;
302302
extern MPL_dbg_class MPIDI_CH4_DBG_MEMORY;
303+
extern MPL_dbg_class MPIDI_CH4_DBG_IPC;
303304
#endif
304305

305306
#endif /* CH4_TYPES_H_INCLUDED */

0 commit comments

Comments
 (0)