Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions buildlib/pr/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,32 @@ stages:
dependsOn: [Basic_compile]
jobs:
- template: cuda/cuda.yml
- job: soname_suffix_cuda_ib
displayName: SONAME suffix and deepbind on CUDA/IB
pool:
name: MLNX
demands:
- ucx_gpu_test -equals yes
container: ubuntu2404_doca31_gpunetio
workspace:
clean: all
timeoutInMinutes: 120

steps:
- checkout: self
clean: true
fetchDepth: 100
retryCountOnTaskFailure: 5

- bash: |
./buildlib/tools/builds.sh
displayName: Build SONAME suffix with CUDA/IB
env:
BUILD_ID: "$(Build.BuildId)-$(Build.BuildNumber)"
build_mode: soname_suffix
soname_suffix_check_hw: yes
ucx_gpu: yes
EXECUTOR_NUMBER: $(AZP_AGENT_ID)


- stage: AddressSanitizer
Expand Down
10 changes: 8 additions & 2 deletions buildlib/tools/builds.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ build_mode=${build_mode:-}
build_mode=${build_mode:-long}

case "${build_mode}" in
long|short|sanity|compilers)
long|short|sanity|compilers|soname_suffix)
;;
*)
azure_log_error "Unsupported build mode: ${build_mode}"
Expand Down Expand Up @@ -507,6 +507,8 @@ check_no_gga() {
fi
}

source ${realdir}/soname-build.sh

az_init_modules
prepare_build

Expand Down Expand Up @@ -537,11 +539,15 @@ long)
'build_no_openmp' \
'build_gcc_debug_opt_with_dndebug' \
'build_clang' \
'build_armclang')
'build_armclang'\
'build_soname_suffix')
;;
compilers)
tests=('build_icc' 'build_pgi')
;;
soname_suffix)
tests=('build_soname_suffix')
;;
esac

num_tests=${#tests[@]}
Expand Down
202 changes: 202 additions & 0 deletions buildlib/tools/soname-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#
# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See file LICENSE for terms.
#

check_elf_soname() {
lib_path=$1
soname=$2

if [ ! -f "$lib_path" ]; then
azure_log_error "Missing library $lib_path"
exit 1
fi

if ! readelf -d "$lib_path" | grep -q "Library soname: \\[$soname\\]"; then
azure_log_error "Library $lib_path does not have SONAME $soname"
exit 1
fi
}

check_elf_needed() {
binary_path=$1
needed=$2

if [ ! -f "$binary_path" ]; then
azure_log_error "Missing binary $binary_path"
exit 1
fi

if ! readelf -d "$binary_path" | grep -q "Shared library: \\[$needed\\]"; then
azure_log_error "Binary $binary_path is not linked to $needed"
exit 1
fi
}

check_linker_symlink() {
link_path=$1
target_pattern=$2

if [ ! -L "$link_path" ]; then
azure_log_error "Missing linker symlink $link_path"
exit 1
fi

if ! readlink "$link_path" | grep -q "$target_pattern"; then
azure_log_error "Linker symlink $link_path does not point to $target_pattern"
exit 1
fi
}

check_uct_module_linkage() {
module=$1
suffix=$2
module_path="${ucx_inst}/lib/ucx/libuct_${module}-${suffix}.so.0.0.0"
shift 2

check_elf_soname "$module_path" "libuct_${module}-${suffix}.so.0"
for needed in "$@"; do
check_elf_needed "$module_path" "$needed"
done
}

build_soname_suffix() {
suffix=ci
foreign_build_dir=${ucx_build_dir}/foreign
foreign_inst=${ucx_build_dir}/foreign-install
soname_suffix_check_hw=${soname_suffix_check_hw:-no}
common_soname_config_args=(
--without-java
--without-go
--without-rocm
--without-xpmem
--without-knem
--disable-doxygen-doc
)

if [ "${soname_suffix_check_hw}" = "yes" ]; then
echo "==== Enable CUDA and IB for SONAME suffix build ===="
cuda_local_dir="/usr/local/cuda"
have_gdrcopy=no

if ! nvidia-smi -L; then
azure_log_error "SONAME suffix CUDA/IB check requires a GPU"
exit 1
fi

if [ ! -d /dev/infiniband ]; then
azure_log_error "SONAME suffix CUDA/IB check requires IB devices"
exit 1
fi

if [ -d "$cuda_local_dir" ] &&
find "$cuda_local_dir" -name 'libcudart.so.1[2-9]*' | grep -q .; then
common_soname_config_args+=(--with-cuda=$cuda_local_dir)
elif az_module_load $CUDA_MODULE; then
common_soname_config_args+=(--with-cuda)
else
azure_log_error "SONAME suffix CUDA/IB check requires CUDA"
exit 1
fi

if [ -w "/dev/gdrdrv" ] && az_module_load $GDRCOPY_MODULE; then
have_gdrcopy=yes
common_soname_config_args+=(--with-gdrcopy)
else
common_soname_config_args+=(--without-gdrcopy)
fi

common_soname_config_args+=(--with-verbs --with-rdmacm)
else
common_soname_config_args+=(
--without-verbs
--without-rdmacm
--without-cuda
)
fi

echo "==== Build foreign UCX without SONAME suffix ===="
mkdir -p $foreign_build_dir
pushd $foreign_build_dir
${WORKSPACE}/contrib/configure-release --prefix=$foreign_inst \
"${common_soname_config_args[@]}"
$MAKEP
$MAKEP install
popd

echo "==== Build with SONAME suffix and module deepbind ===="
${WORKSPACE}/contrib/configure-release --prefix=$ucx_inst \
--enable-gtest \
--enable-test-apps \
--with-soname-suffix=$suffix \
--enable-module-deepbind \
"${common_soname_config_args[@]}"
$MAKEP
$MAKEP install

grep "#define UCX_MODULE_FILE_SUFFIX \"-$suffix\"" config.h
grep "#define UCX_MODULE_DLOPEN_DEEPBIND 1" config.h
grep " -lucp-${suffix}" "${ucx_inst}/lib/pkgconfig/ucx.pc"
grep " -lucs-${suffix} -lucm-${suffix}" \
"${ucx_inst}/lib/pkgconfig/ucx-ucs.pc"
grep " -luct-${suffix}" "${ucx_inst}/lib/pkgconfig/ucx-uct.pc"
for lib in ucs ucp uct; do
grep "lib${lib}-${suffix}.so" \
"${ucx_inst}/lib/cmake/ucx/ucx-targets.cmake"
done
if [ "${soname_suffix_check_hw}" = "yes" ]; then
grep "#define HAVE_CUDA 1" config.h
grep "#define HAVE_IB 1" config.h
fi

for lib in ucm ucs uct ucp; do
check_elf_soname \
"${ucx_inst}/lib/lib${lib}-${suffix}.so.0.0.0" \
"lib${lib}-${suffix}.so.0"
check_linker_symlink \
"${ucx_inst}/lib/lib${lib}.so" \
"lib${lib}-${suffix}\\.so"
done

check_uct_module_linkage cma $suffix \
"libuct-${suffix}.so.0" \
"libucs-${suffix}.so.0"
if [ "${soname_suffix_check_hw}" = "yes" ]; then
for module in cuda ib rdmacm; do
check_uct_module_linkage $module $suffix \
"libuct-${suffix}.so.0" \
"libucs-${suffix}.so.0"
done
if [ "${have_gdrcopy}" = "yes" ]; then
check_uct_module_linkage cuda_gdrcopy $suffix \
"libuct_cuda-${suffix}.so.0"
fi
fi
check_elf_soname \
"${ucx_build_dir}/test/gtest/ucs/test_module/.libs/libtest_module-${suffix}.so.0.0.0" \
"libtest_module-${suffix}.so.0"
check_elf_needed \
"${ucx_inst}/lib/libucp-${suffix}.so.0.0.0" \
"libuct-${suffix}.so.0"
check_elf_needed \
"${ucx_inst}/lib/libucp-${suffix}.so.0.0.0" \
"libucs-${suffix}.so.0"
for lib in ucp uct ucs; do
check_elf_needed \
"${ucx_inst}/bin/ucx_info" \
"lib${lib}-${suffix}.so.0"
done
check_elf_needed \
"${ucx_build_dir}/test/apps/.libs/libtest_ucx_isolation_plugin.so" \
"libucp-${suffix}.so.0"

LD_LIBRARY_PATH="${ucx_inst}/lib:${foreign_inst}/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" \
"${ucx_build_dir}/test/apps/test_ucx_dlopen_isolation" \
"${foreign_inst}/lib/libucp.so.0.0.0" \
"${ucx_build_dir}/test/apps/.libs/libtest_ucx_isolation_plugin.so" \
"$suffix" deepbind

GTEST_FILTER=test_sys.module_file_suffix:test_sys.module \
$MAKE -C test/gtest test
}
6 changes: 3 additions & 3 deletions cmake/ucx-targets.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ if(NOT TARGET ucx::ucs)
add_library(ucx::ucs SHARED IMPORTED)

set_target_properties(ucx::ucs PROPERTIES
IMPORTED_LOCATION "@libdir@/libucs.so"
IMPORTED_LOCATION "@libdir@/libucs@UCX_LIBRARY_FILE_SUFFIX@.so"
INTERFACE_INCLUDE_DIRECTORIES "@includedir@"
)
endif()
Expand All @@ -20,7 +20,7 @@ if(NOT TARGET ucx::ucp)
add_library(ucx::ucp SHARED IMPORTED)

set_target_properties(ucx::ucp PROPERTIES
IMPORTED_LOCATION "@libdir@/libucp.so"
IMPORTED_LOCATION "@libdir@/libucp@UCX_LIBRARY_FILE_SUFFIX@.so"
INTERFACE_INCLUDE_DIRECTORIES "@includedir@"
)
endif()
Expand All @@ -29,7 +29,7 @@ if(NOT TARGET ucx::uct)
add_library(ucx::uct SHARED IMPORTED)

set_target_properties(ucx::uct PROPERTIES
IMPORTED_LOCATION "@libdir@/libuct.so"
IMPORTED_LOCATION "@libdir@/libuct@UCX_LIBRARY_FILE_SUFFIX@.so"
INTERFACE_INCLUDE_DIRECTORIES "@includedir@"
)
endif()
47 changes: 47 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,43 @@ AC_SUBST(EXTRA_VERSION)
AC_SUBST(SCM_VERSION)
AC_SUBST(SOVERSION)

UCX_LT_RELEASE=
UCX_LIBRARY_FILE_SUFFIX=
ucx_soname_suffix_summary="<disabled>"
ucx_module_deepbind_summary="no"
AC_ARG_WITH([soname-suffix],
AS_HELP_STRING([--with-soname-suffix=SUFFIX],
[Append SUFFIX to UCX installed shared library names and SONAMEs. Disabled by default. [default=NO]]),
[], [with_soname_suffix=no])
AS_IF([test "x$with_soname_suffix" != xno],
[AS_IF([test "x$with_soname_suffix" = xyes],
[AC_MSG_ERROR([--with-soname-suffix requires an explicit suffix value])])
case "$with_soname_suffix" in
@<:@A-Za-z0-9@:>@*) ;;
*) AC_MSG_ERROR([--with-soname-suffix must start with a letter or digit]) ;;
esac
case "$with_soname_suffix" in
*@<:@!A-Za-z0-9_-@:>@*)
AC_MSG_ERROR([--with-soname-suffix must contain only letters, digits, underscores, and dashes])
;;
esac
UCX_LIBRARY_FILE_SUFFIX="-$with_soname_suffix"
UCX_LT_RELEASE="-release $with_soname_suffix"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚨 BLOCKER

libtool's -release changes the installed library filename (e.g. libucp-SUFFIX.so.0), not just the SONAME. With this set, ucx.pc (which emits -lucp) and cmake/ucx-targets.cmake.in (which hardcodes libucp.so/libucs.so/libuct.so) will fail to locate the libs for downstream consumers. Either also template these files with the suffix, or use a mechanism that only alters the SONAME (e.g., -Wl,-soname=...) if the intent really is SONAME-only as the option name suggests.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ℹ️ INFO

only the four core libs (ucm/ucp/ucs/uct) get $(UCX_LT_RELEASE); the per-transport module libs (e.g. src/uct/ib/Makefile.am, src/ucm/cuda/Makefile.am, …) still use plain -version-info $(SOVERSION). is the asymmetry intentional? if two UCX trees with different suffixes are installed in the same prefix, the modules will collide.

AC_DEFINE_UNQUOTED([UCX_MODULE_FILE_SUFFIX],
["$UCX_LIBRARY_FILE_SUFFIX"],
[Suffix appended to private UCX module file names])
ucx_soname_suffix_summary="$with_soname_suffix"])
AC_SUBST([UCX_LT_RELEASE])
AC_SUBST([UCX_LIBRARY_FILE_SUFFIX])

AC_ARG_ENABLE([module-deepbind],
AS_HELP_STRING([--enable-module-deepbind],
[Load UCX modules with RTLD_DEEPBIND. Intended for private UCX bundles. Disabled by default. [default=NO]]),
[], [enable_module_deepbind=no])
AS_IF([test "x$enable_module_deepbind" = xyes],
[AS_IF([test "x$with_soname_suffix" = xno],
[AC_MSG_ERROR([--enable-module-deepbind requires --with-soname-suffix])])])

AC_PROG_CC
AC_PROG_CXX
AC_OPENMP
Expand All @@ -87,6 +124,14 @@ AC_FUNC_STRERROR_R

AC_PATH_TOOL([PKG_CONFIG], [pkg-config], [pkg-config])

AS_IF([test "x$enable_module_deepbind" = xyes],
[AC_CHECK_DECLS([RTLD_DEEPBIND],
[AC_DEFINE([UCX_MODULE_DLOPEN_DEEPBIND], [1],
[Load UCX modules with RTLD_DEEPBIND])
ucx_module_deepbind_summary="yes"],
[AC_MSG_ERROR([--enable-module-deepbind requires RTLD_DEEPBIND support from <dlfcn.h>])],
[[#include <dlfcn.h>]])])
Comment thread
roiedanino marked this conversation as resolved.


#
# Define SHARED_LIB preprocessor macro when building a shared library
Expand Down Expand Up @@ -435,6 +480,8 @@ AC_MSG_NOTICE([ Multi-thread: ${mt_enable}])
AC_MSG_NOTICE([ MPI tests: ${mpi_enable}])
AC_MSG_NOTICE([ VFS support: ${vfs_enable}])
AC_MSG_NOTICE([ Devel headers: ${enable_devel_headers}])
AC_MSG_NOTICE([ SONAME suffix: ${ucx_soname_suffix_summary}])
AC_MSG_NOTICE([ Module deepbind: ${ucx_module_deepbind_summary}])
AC_MSG_NOTICE([io_demo CUDA support: ${with_iodemo_cuda}])
AC_MSG_NOTICE([ Bindings: <$(echo ${build_bindings}|tr ':' ' ') >])
AC_MSG_NOTICE([ UCS modules: <$(echo ${ucs_modules}|tr ':' ' ') >])
Expand Down
3 changes: 2 additions & 1 deletion src/tools/perf/cuda/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ if HAVE_CUDA
module_LTLIBRARIES = libucx_perftest_cuda.la
libucx_perftest_cuda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS)
libucx_perftest_cuda_la_CFLAGS = $(BASE_CFLAGS) $(CUDA_CFLAGS) $(LT_CFLAGS)
libucx_perftest_cuda_la_LDFLAGS = $(CUDA_LDFLAGS) -version-info $(SOVERSION)
libucx_perftest_cuda_la_LDFLAGS = $(CUDA_LDFLAGS) -version-info $(SOVERSION) \
$(UCX_LT_RELEASE)
libucx_perftest_cuda_la_LIBADD = $(CUDART_LIBS)
libucx_perftest_cuda_la_SOURCES = cuda_alloc.c

Expand Down
3 changes: 2 additions & 1 deletion src/tools/perf/mad/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ module_LTLIBRARIES = libucx_perftest_mad.la
libucx_perftest_mad_la_CPPFLAGS = $(BASE_CPPFLAGS)
libucx_perftest_mad_la_CFLAGS = $(BASE_CFLAGS) $(MAD_CFLAGS) \
$(OPENMP_CFLAGS) $(LT_CFLAGS)
libucx_perftest_mad_la_LDFLAGS = $(MAD_LDFLAGS) -version-info $(SOVERSION)
libucx_perftest_mad_la_LDFLAGS = $(MAD_LDFLAGS) -version-info $(SOVERSION) \
$(UCX_LT_RELEASE)
libucx_perftest_mad_la_LIBADD = $(MAD_LIBS)
libucx_perftest_mad_la_SOURCES = perftest_mad.c

Expand Down
1 change: 1 addition & 0 deletions src/tools/perf/rocm/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ libucx_perftest_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(HIP_CPPFLAGS)
libucx_perftest_rocm_la_CFLAGS = $(BASE_CFLAGS) $(HIP_CFLAGS) \
$(LT_CFLAGS)
libucx_perftest_rocm_la_LDFLAGS = $(HIP_LDFLAGS) $(HIP_LIBS) -version-info $(SOVERSION) \
$(UCX_LT_RELEASE) \
$(patsubst %, -Xlinker %, -L$(ROCM_ROOT)/lib -rpath $(ROCM_ROOT)/hip/lib -rpath $(ROCM_ROOT)/lib) \
$(patsubst %, -Xlinker %, --enable-new-dtags) \
$(patsubst %, -Xlinker %, -rpath $(ROCM_ROOT)/lib64)
Expand Down
Loading
Loading