Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions config/ompi_check_ubcl.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# -*- shell-script -*-
#
# Copyright (C) 2015-2017 Mellanox Technologies, Inc.
# All rights reserved.
# Copyright (c) 2015 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# Copyright (c) 2016 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
# Copyright (c) 2024-2025 Bull S.A.S. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

# OMPI_CHECK_UBCL(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
# check if UBCL support can be found. sets prefix_{CPPFLAGS,
# as needed and runs action-if-found if there is
# support, otherwise executes action-if-not-found
AC_DEFUN([OMPI_CHECK_UBCL],[
OPAL_VAR_SCOPE_PUSH([ompi_check_ubcl_dir ompi_check_ubcl_happy])

m4_ifblank([$1], [m4_fatal([First argument to OMPI_CHECK_UBCL cannot be blank])])

AC_ARG_WITH([ubcl],
[AC_HELP_STRING([--with-ubcl(=DIR)],
[Build with UBCL support])])

# UBCL is dlopen'd to avoid direct link to libubcl.so.
# OAC_CHECK_PACKAGE would add this explicit link, so it cannot be used.
# OPAL_CHECK_WITHDIR prints an error if the given path is invalid
OPAL_CHECK_WITHDIR([ubcl], [$with_ubcl], [include/ubcl_api.h])

AS_IF([test "$with_ubcl" == "no"],
[ompi_check_ubcl_happy="no"],

[test -z "$with_ubcl"],
[ompi_check_ubcl_happy="no"],

[ompi_check_ubcl_happy="yes"
$1_CPPFLAGS="${$1_CPPFLAGS} -I$with_ubcl/include/"
AC_MSG_NOTICE([$1_CPPFLAGS is set to: ${$1_CPPFLAGS}])])


OPAL_SUMMARY_ADD([Transports],[UBCL],[],[$ompi_check_ubcl_happy])

AS_IF([test "$ompi_check_ubcl_happy" = "yes"],
[$2],
[$3])

OPAL_VAR_SCOPE_POP
])

94 changes: 94 additions & 0 deletions ompi/mca/common/ubcl/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Copyright (c) 2025 Bull SAS. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

#AM_CPPFLAGS = $(common_ubcl_CPPFLAGS)

common_ubcl_sources = \
common_ubcl.c \
common_ubcl.h

lib_LTLIBRARIES =
noinst_LTLIBRARIES =

#Common component naming is forced by MCA_PROCESS_COMPONENT in config/opal_mca.m4
# to lib${<PROJECT>_LIB_NAME}mca_common_ubcl.la but OMPI_LIB_NAME does not exist
# so let's hope that no other project name is empty or there are no other common
comp_inst = libmca_common_ubcl.la
comp_noinst = libmca_common_ubcl_noinst.la

if MCA_BUILD_ompi_common_ubcl_DSO
lib_LTLIBRARIES += $(comp_inst)
else
noinst_LTLIBRARIES += $(comp_noinst)
endif

libmca_common_ubcl_la_SOURCES = $(common_ubcl_sources)
libmca_common_ubcl_la_CFLAGS = $(common_ubcl_CFLAGS)
libmca_common_ubcl_la_CPPFLAGS = $(common_ubcl_CPPFLAGS)
libmca_common_ubcl_la_LDFLAGS = $(common_ubcl_LDFLAGS)
libmca_common_ubcl_la_LIBADD = $(common_ubcl_LIBS) \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/ubcl/lib@OPAL_LIB_NAME@mca_common_ubcl.la

libmca_common_ubcl_noinst_la_SOURCES = $(common_ubcl_sources)

# Conditionally install the header files

if WANT_INSTALL_HEADERS
ompidir = $(ompiincludedir)/$(subdir)
ompi_HEADERS = common_ubcl.h
endif


# This library is linked against various MCA components.
# There's two cases:
#
# 1. libmca_common_ubcl.la is a shared library. By linking that shared
# library to all components that need it, the OS linker will
# automatically load it into the process as necessary, and there will
# only be one copy (i.e., all the components will share *one* copy of
# the code and data).
#
# 2. libmca_common_ubcl.la is a static library. In this case, it will
# be rolled up into the top-level libmpi.la. It will also be rolled
# into each component, but then the component will also be rolled up
# into the upper-level libmpi.la. Linkers universally know how to
# "figure this out" so that we end up with only one copy of the code
# and data.
#
# As per above, we'll either have an installable or noinst result.
# The installable one should follow the same MCA prefix naming rules
# (i.e., libmca_<type>_<name>.la). The noinst one can be named
# whatever it wants, although libmca_<type>_<name>_noinst.la is
# recommended.

# To simplify components that link to this library, we will *always*
# have an output libtool library named libmca_<type>_<name>.la -- even
# for case 2) described above (i.e., so there's no conditional logic
# necessary in component Makefile.am's that link to this library).
# Hence, if we're creating a noinst version of this library (i.e.,
# case 2), we sym link it to the libmca_<type>_<name>.la name
# (libtool will do the Right Things under the covers). See the
# all-local and clean-local rules, below, for how this is effected.
# These two rules will sym link the "noinst" libtool library filename
# to the installable libtool library filename in the case where we are
# compiling this component statically (case 2), described above).
V=0
OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V)
ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY)
ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(comp_inst)`;

all-local:
$(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(comp_inst)"; \
$(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
fi

clean-local:
if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(comp_inst)"; \
fi
173 changes: 173 additions & 0 deletions ompi/mca/common/ubcl/common_ubcl.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2025 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#include <execinfo.h>
#include <stdint.h>
#include <stdio.h>

#include "ompi/communicator/communicator.h"
#include "ompi/errhandler/errhandler.h"
#include "ompi/include/mpi.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/mca/common/ubcl/common_ubcl.h"
#include "ompi/mca/pml/ubcl/pml_ubcl.h"
#include "ompi/mca/pml/pml_constants.h"
#include "opal/mca/common/ubcl/common_ubcl.h"
#include "opal/util/output.h"

/* Default ompi_common_ubcl values */
mca_ompi_common_ubcl_component_t mca_ompi_common_ubcl_component = {
.n_addr = 32,
};

static int mca_common_ubcl_find_rank(const struct ompi_communicator_t *comm, const uint64_t wrank)
{
mca_pml_ubcl_comm_t *pml_comm = comm->c_pml_comm;

if (NULL == comm->c_pml_comm) {
common_ubcl_error("UBCL error: no translation array in comm");
abort();
}

for (uint32_t i = 0; i < pml_comm->size; i++) {
if (pml_comm->array[i] == wrank) {
return i;
}
}

common_ubcl_error("UBCL error irank translation");

return 0;
}

int mca_common_ubcl_get_mpi_rank(const int rank, const struct ompi_communicator_t *comm,
const uint64_t ubcl_rank)
{
if (OMPI_ANY_SOURCE == rank) {
return mca_common_ubcl_find_rank(comm, ubcl_rank);
} else {
return rank;
}
}

void mca_common_ubcl_status_to_ompi(ompi_status_public_t *status,
ubcl_status_t ubcl_status,
struct ompi_communicator_t *comm, int rank)
{
if (MPI_STATUS_IGNORE != status) {
status->_cancelled = 0; //TODO output the information of cancel
status->_ucount = ubcl_status.size;
status->MPI_TAG = (int) ubcl_status.tag;
status->MPI_SOURCE = mca_common_ubcl_get_mpi_rank(rank, comm, ubcl_status.remote);
}
}

int ubcl_error_to_ompi(ubcl_error_t code)
{
int ret;
switch (code) {
case UBCL_SUCCESS:
ret = OPAL_SUCCESS;
break;
case UBCL_ERROR:
ret = OPAL_ERROR;
break;
case UBCL_ERR_RESOURCE_BUSY:
ret = OPAL_ERR_RESOURCE_BUSY;
break;
case UBCL_ERR_OUT_OF_RESOURCE:
ret = OPAL_ERR_OUT_OF_RESOURCE;
break;
case UBCL_ERR_NOT_IMPLEMENTED:
ret = OPAL_ERR_NOT_IMPLEMENTED;
break;
case UBCL_ERR_NOT_AVAILABLE:
ret = OPAL_ERR_NOT_AVAILABLE;
break;
case UBCL_ERR_TEMP_OUT_OF_RESOURCE:
ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE;
break;
case UBCL_ERR_ARG_INVALID:
ret = OPAL_ERR_BAD_PARAM;
break;
case UBCL_ERR_TOO_LATE:
ret = OPAL_ERR_TIMEOUT;
break;
case UBCL_ERR_TRUNCATE:
ret = MPI_ERR_TRUNCATE;
break;
default:
ret = OPAL_ERROR;
break;
}

return ret;
}

void _mca_common_ubcl_error(char *filename, int line, int err,
char abort, int verbose, int output,
int is_init, int comp_verbose,
char *comp_name, char *format, ...)
{
int n_addr = 0;
void **stack_buffer = NULL;
char **stack = NULL;

stack_buffer = malloc(sizeof(void *) * mca_ompi_common_ubcl_component.n_addr);
n_addr = backtrace(stack_buffer, mca_ompi_common_ubcl_component.n_addr);
stack = backtrace_symbols(stack_buffer, n_addr);

int char_per_line = 256;
int n_char = char_per_line * n_addr + 1024;
char *msg = malloc(n_char * sizeof(char));

if (NULL == stack || NULL == msg) {
/* Output small error */
opal_output_verbose(verbose, output,
"========\n== ERROR: Not enough memory while outputting error...\n== "
"%s encountered an error (%d) at %s:%d\n========\n",
comp_name, err, filename, line);
} else {
/* Output full error */
int current = 0;
current += snprintf(msg + current, n_char - current,
"========\n== %s encountered an error (%d) at %s:%d\n== %s:\n\t",
comp_name, err, filename, line, abort ? "ERROR" : "WARNING");
va_list arglist;
va_start(arglist, format);
current += vsnprintf(msg + current, n_char - current, format, arglist);
va_end(arglist);

current += snprintf(msg + current, n_char - current, "\n== STACK:\n");

for (int i = 0; i < n_addr; i++) {
size_t min_char = char_per_line < (n_char - current) ? char_per_line : n_char - current;
current += snprintf(msg + current, min_char, "= [%2d] %s\n", i,
stack[i]);
}

if (is_init && output > 0) {
opal_output_verbose(verbose, output,
"%s========", msg);
} else if (abort || comp_verbose >= verbose) {
fprintf(stderr, "%s\n", msg);
fflush(stderr);
}
}

if (abort) {
OMPI_ERRHANDLER_INVOKE(&ompi_mpi_comm_world.comm, err, stack[0]);
ompi_mpi_abort(&ompi_mpi_comm_world.comm, err);
}

free(stack_buffer);
free(stack);
free(msg);
}
44 changes: 44 additions & 0 deletions ompi/mca/common/ubcl/common_ubcl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2025 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#ifndef OMPI_MCA_COMMON_UBCL_H
#define OMPI_MCA_COMMON_UBCL_H

#include <stddef.h>

#include "ompi/communicator/communicator.h"
#include "ompi/include/mpi.h"
#include "opal/mca/common/ubcl/common_ubcl.h"

/* Holds common variable used in multiple UBCL components */
struct mca_ompi_common_ubcl_component_s {
int n_addr; /**< Max number of void * addresses in printed stack*/
};
typedef struct mca_ompi_common_ubcl_component_s mca_ompi_common_ubcl_component_t;
extern mca_ompi_common_ubcl_component_t mca_ompi_common_ubcl_component;

int mca_common_ubcl_get_mpi_rank(const int rank, const struct ompi_communicator_t *comm,
const uint64_t ubcl_rank);
void mca_common_ubcl_status_to_ompi(ompi_status_public_t *status,
ubcl_status_t ubcl_status,
struct ompi_communicator_t *comm, int rank);
int ubcl_error_to_ompi(ubcl_error_t code);
/* UBCL rank is on 61 bits, ompi jobid is 32bits, vpid must be truncated to 29bits */
#define COMMON_UBCL_VPID_MAX (((1 << 29) - 1)) /* We need 3 bits for UBCL rank */
#define PML_UBCL_JOBID_MAX (OPAL_JOBID_MAX)

/* Error and warning output function used by UBCL components */
void _mca_common_ubcl_error(char *filename, int line, int err, char abort, int verbose,
int output, int is_init, int comp_verbose, char *comp_name,
char *format, ...);


#endif /* OMPI_MCA_COMMON_UBCL_H */

Loading
Loading