From e68ede2c53a241b3f85a0b53551d6c1b33709059 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 13 Nov 2023 10:12:21 -0700 Subject: [PATCH 1/2] Correctly access the communicator name is MSGQ. We transformed the communicator c_name field from an array into a pointer and fialed to update the MSGQ accessors to the field. Thanks @david-edwards-linaro for the report and the initial patch. Fixes #12063. Signed-off-by: George Bosilca (cherry picked from commit 877ee24c299ce64ef118bb1eeaee5154ceb85956) --- ompi/debuggers/ompi_mpihandles_dll.c | 19 +++++++++++++++++-- ompi/debuggers/ompi_msgq_dll.c | 24 +++++++++++++++++++++--- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/ompi/debuggers/ompi_mpihandles_dll.c b/ompi/debuggers/ompi_mpihandles_dll.c index cbded9bbc04..6ad92a974c8 100644 --- a/ompi/debuggers/ompi_mpihandles_dll.c +++ b/ompi/debuggers/ompi_mpihandles_dll.c @@ -7,7 +7,9 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 IBM Corp. All rights reserved. + * Copyright (c) 2017 IBM Corp. All rights reserved. + * Copyright (c) 2023 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -458,8 +460,21 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, (*info)->comm_c_handle = c_comm; printf("mpidbg_comm_query: %p\n", (void*) c_comm); - mqs_fetch_data(process, c_comm + i_info->ompi_communicator_t.offset.c_name, + mqs_taddr_t name_addr = ompi_fetch_pointer( process, + c_comm + i_info->ompi_communicator_t.offset.c_name, + p_info ); + mqs_fetch_data(process, name_addr, MPIDBG_MAX_OBJECT_NAME, (*info)->comm_name); + (*info)->comm_name[MPIDBG_MAX_OBJECT_NAME-1] = '\0'; + /* Defensively zero anything beyond the actual name. + + We know that MPIDBG_MAX_OBJECT_NAME == MPI_MAX_OBJECT_NAME (per + mpihandles_interface.h), and OMPI *guarantees* that + (*info)->comm_name will be both \0-terminated, and have a + maximum of (MPI_MAX_OBJECT_NAME-1) non-NULL characters. So the + memset length expression below is guaranted be >=0. */ + memset((*info)->comm_name + strlen((*info)->comm_name), 0, + MPIDBG_MAX_OBJECT_NAME - 1 - strlen((*info)->comm_name)); /* Get this process' rank in the comm */ (*info)->comm_rank = ompi_fetch_int(process, diff --git a/ompi/debuggers/ompi_msgq_dll.c b/ompi/debuggers/ompi_msgq_dll.c index 4516b8df23f..5779140979c 100644 --- a/ompi/debuggers/ompi_msgq_dll.c +++ b/ompi/debuggers/ompi_msgq_dll.c @@ -10,6 +10,8 @@ * Copyright (c) 2016 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2023 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -682,8 +684,18 @@ static int rebuild_communicator_list (mqs_process *proc) p_info ); old->group = find_or_create_group( proc, group_base ); } - mqs_fetch_data( proc, comm_ptr + i_info->ompi_communicator_t.offset.c_name, - 64, old->comm_info.name ); + mqs_taddr_t name_addr = ompi_fetch_pointer( proc, + comm_ptr + i_info->ompi_communicator_t.offset.c_name, + p_info ); + /* c_name can be up to MPI_MAX_OBJECT_NAME bytes, but we only + * copy the first (sizeof(old->comm_info.name)-1) here. Make + * sure the string is correctly terminated. */ + size_t target_size = sizeof(old->comm_info.name); + mqs_fetch_data( proc, name_addr, target_size, old->comm_info.name ); + old->comm_info.name[target_size - 1] = '\0'; + /* Defensively zero anything beyond the actual name */ + size_t src_strlen = strlen(old->comm_info.name); + memset(old->comm_info.name + src_strlen, 0, target_size - 1 - src_strlen); if( NULL != old->group ) { old->comm_info.size = old->group->entries; @@ -1156,8 +1168,9 @@ static int fetch_request( mqs_process *proc, mpi_process_info *p_info, ompi_datatype + i_info->ompi_datatype_t.offset.size, p_info ); /* Be user friendly, show the datatype name */ + size_t data_name_size = sizeof(data_name); mqs_fetch_data( proc, ompi_datatype + i_info->ompi_datatype_t.offset.name, - 64, data_name ); + data_name_size, data_name ); if( '\0' != data_name[0] ) { // res->extra_text[x] is only 64 chars long -- same as // data_name. If you try to snprintf it into @@ -1171,6 +1184,11 @@ static int fetch_request( mqs_process *proc, mpi_process_info *p_info, (int)res->desired_length); snprintf( (char*)res->extra_text[2], 64, "%s", data_name ); + } else { + data_name[data_name_size - 1] = '\0'; + /* Be nice and zero anything beyond the actual name */ + size_t data_name_strlen = strlen(data_name); + memset(data_name + data_name_strlen, 0, data_name_size - 1 - data_name_strlen); } /* And now compute the real length as specified by the user */ res->desired_length *= From e0b5ae2ceb6075102839fb12655a7afc15e1cebc Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 16 Nov 2023 10:59:51 -0500 Subject: [PATCH 2/2] ompi/debuggers: Fix a comment and remove a debug printf Signed-off-by: Jeff Squyres (cherry picked from commit ed4b78d762e855c88951d921ff6d7ae72fae44d3) --- ompi/debuggers/ompi_mpihandles_dll.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ompi/debuggers/ompi_mpihandles_dll.c b/ompi/debuggers/ompi_mpihandles_dll.c index 6ad92a974c8..aabb803a8bb 100644 --- a/ompi/debuggers/ompi_mpihandles_dll.c +++ b/ompi/debuggers/ompi_mpihandles_dll.c @@ -454,12 +454,11 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, if (NULL == *info) { return MPIDBG_ERR_NO_MEM; } - /* JMS temporarily zero everything out. Remove this when we fill - in all the fields */ + /* Zero everything out. Remove this when we fill in all the + fields */ memset(*info, 0, sizeof(struct mpidbg_comm_info_t)); (*info)->comm_c_handle = c_comm; - printf("mpidbg_comm_query: %p\n", (void*) c_comm); mqs_taddr_t name_addr = ompi_fetch_pointer( process, c_comm + i_info->ompi_communicator_t.offset.c_name, p_info );