diff --git a/src/uct/cuda/base/cuda_ctx.c b/src/uct/cuda/base/cuda_ctx.c index 1a763a31401..a4125a14887 100644 --- a/src/uct/cuda/base/cuda_ctx.c +++ b/src/uct/cuda/base/cuda_ctx.c @@ -20,8 +20,8 @@ ucs_status_t uct_cuda_ctx_primary_retain(CUdevice cuda_device, int force, CUcontext cuda_ctx; if (!force) { - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuDevicePrimaryCtxGetState(cuda_device, &flags, &active)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxGetState, + cuda_device, &flags, &active); if (status != UCS_OK) { return status; } @@ -33,8 +33,8 @@ ucs_status_t uct_cuda_ctx_primary_retain(CUdevice cuda_device, int force, } } - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuDevicePrimaryCtxRetain(&cuda_ctx, cuda_device)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain, &cuda_ctx, + cuda_device); if (status != UCS_OK) { return status; } @@ -49,14 +49,14 @@ ucs_status_t uct_cuda_ctx_primary_push_first_active(CUdevice *cuda_device_p) ucs_status_t status; CUdevice cuda_device; - status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount(&num_devices)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount, &num_devices); if (status != UCS_OK) { return status; } for (device_index = 0; device_index < num_devices; ++device_index) { - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuDeviceGet(&cuda_device, device_index)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cuda_device, + device_index); if (status != UCS_OK) { return status; } @@ -85,9 +85,9 @@ ucs_status_t uct_cuda_ctx_primary_push(CUdevice cuda_device, int retain_inactive return status; } - status = UCT_CUDADRV_FUNC(cuCtxPushCurrent(primary_ctx), log_level); + status = UCT_CUDADRV_FUNC(log_level, cuCtxPushCurrent, primary_ctx); if (status != UCS_OK) { - (void)UCT_CUDADRV_FUNC(cuDevicePrimaryCtxRelease(cuda_device), log_level); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device); } return status; @@ -102,7 +102,7 @@ ucs_status_t uct_cuda_ctx_primary_push_avail(int retain_inactive, ucs_status_t status; int dev_ordinal, num_devices; - status = UCT_CUDADRV_FUNC_LOG_DEBUG(cuCtxGetDevice(cuda_device_p)); + status = UCT_CUDADRV_FUNC_LOG_DEBUG(cuCtxGetDevice, cuda_device_p); if (status != UCS_OK) { *cuda_device_p = CU_DEVICE_INVALID; } @@ -137,15 +137,15 @@ ucs_status_t uct_cuda_ctx_primary_push_avail(int retain_inactive, return UCS_OK; } - status = UCT_CUDADRV_FUNC(cuDeviceGetCount(&num_devices), UCS_LOG_LEVEL_DIAG); + status = UCT_CUDADRV_FUNC_LOG_DIAG(cuDeviceGetCount, &num_devices); if (status != UCS_OK) { return UCS_ERR_INVALID_PARAM; } /* Use the first active cuda device for allocation */ for (dev_ordinal = 0; dev_ordinal < num_devices; dev_ordinal++) { - if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGet(avail_cuda_device_p, - dev_ordinal)) != UCS_OK) { + if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGet, avail_cuda_device_p, + dev_ordinal) != UCS_OK) { continue; } @@ -162,6 +162,6 @@ ucs_status_t uct_cuda_ctx_primary_push_avail(int retain_inactive, void uct_cuda_ctx_primary_pop_and_release(CUdevice cuda_device) { - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device); } diff --git a/src/uct/cuda/base/cuda_ctx.inl b/src/uct/cuda/base/cuda_ctx.inl index 694c2580e1e..c3995fc105d 100644 --- a/src/uct/cuda/base/cuda_ctx.inl +++ b/src/uct/cuda/base/cuda_ctx.inl @@ -38,12 +38,12 @@ uct_cuda_ctx_pop_and_release(CUdevice cuda_device, CUcontext cuda_context) return; } - UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); if (cuda_device == CU_DEVICE_INVALID) { return; } - UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device); } #endif diff --git a/src/uct/cuda/base/cuda_iface.c b/src/uct/cuda/base/cuda_iface.c index 6586efbe536..653078f3702 100644 --- a/src/uct/cuda/base/cuda_iface.c +++ b/src/uct/cuda/base/cuda_iface.c @@ -1,5 +1,5 @@ /** - * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2019. ALL RIGHTS RESERVED. + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2019-2026. ALL RIGHTS RESERVED. * * See file LICENSE for terms. */ @@ -28,7 +28,7 @@ uct_cuda_base_query_devices_common( ucs_status_t status; if (uct_cuda_ctx_is_active()) { - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetDevice(&cuda_device)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetDevice, &cuda_device); if (status != UCS_OK) { return status; } @@ -141,15 +141,13 @@ ucs_status_t uct_cuda_base_iface_event_fd_arm(uct_iface_h tl_iface, if (!ucs_queue_is_empty(event_q)) { status = #if (__CUDACC_VER_MAJOR__ >= 100000) - UCT_CUDADRV_FUNC_LOG_ERR( - cuLaunchHostFunc(*stream, - uct_cuda_base_iface_stream_cb_fxn, - iface)); + UCT_CUDADRV_FUNC_LOG_ERR(cuLaunchHostFunc, *stream, + uct_cuda_base_iface_stream_cb_fxn, + iface); #else - UCT_CUDADRV_FUNC_LOG_ERR( - cuStreamAddCallback(*stream, - uct_cuda_base_iface_stream_cb_fxn, - iface, 0)); + UCT_CUDADRV_FUNC_LOG_ERR(cuStreamAddCallback, *stream, + uct_cuda_base_iface_stream_cb_fxn, + iface, 0); #endif if (UCS_OK != status) { return status; @@ -318,7 +316,7 @@ ucs_status_t uct_cuda_base_iface_flush(uct_iface_h tl_iface, unsigned flags, void uct_cuda_base_stream_destroy(CUstream *stream) { if (*stream != NULL) { - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuStreamDestroy(*stream)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuStreamDestroy, *stream); } } @@ -327,15 +325,15 @@ uct_cuda_base_event_desc_init(ucs_mpool_t *mp, void *obj, void *chunk) { uct_cuda_event_desc_t *event_desc = obj; - UCT_CUDADRV_FUNC_LOG_ERR(cuEventCreate(&event_desc->event, - CU_EVENT_DISABLE_TIMING)); + UCT_CUDADRV_FUNC_LOG_ERR(cuEventCreate, &event_desc->event, + CU_EVENT_DISABLE_TIMING); } static void uct_cuda_base_event_desc_cleanup(ucs_mpool_t *mp, void *obj) { uct_cuda_event_desc_t *event_desc = obj; - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuEventDestroy(event_desc->event)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuEventDestroy, event_desc->event); } void uct_cuda_base_queue_desc_init(uct_cuda_queue_desc_t *qdesc) @@ -375,7 +373,7 @@ ucs_status_t uct_cuda_base_ctx_rsc_create(uct_cuda_iface_t *iface, uct_cuda_ctx_rsc_t *ctx_rsc; ucs_mpool_params_t mp_params; - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetCurrent(&ctx)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetCurrent, &ctx); if (status != UCS_OK) { return status; } else if (ctx == NULL) { diff --git a/src/uct/cuda/base/cuda_iface.h b/src/uct/cuda/base/cuda_iface.h index b17ddfc55f6..214835c59ca 100644 --- a/src/uct/cuda/base/cuda_iface.h +++ b/src/uct/cuda/base/cuda_iface.h @@ -1,5 +1,5 @@ /** - * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018. ALL RIGHTS RESERVED. + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2026. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ @@ -166,8 +166,8 @@ uct_cuda_base_init_stream(CUstream *stream) return UCS_OK; } - return UCT_CUDADRV_FUNC_LOG_ERR( - cuStreamCreate(stream, CU_STREAM_NON_BLOCKING)); + return UCT_CUDADRV_FUNC_LOG_ERR(cuStreamCreate, stream, + CU_STREAM_NON_BLOCKING); } #endif diff --git a/src/uct/cuda/base/cuda_md.c b/src/uct/cuda/base/cuda_md.c index c0671ac45a1..2aca70b45f5 100644 --- a/src/uct/cuda/base/cuda_md.c +++ b/src/uct/cuda/base/cuda_md.c @@ -1,5 +1,5 @@ /** - * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2019. ALL RIGHTS RESERVED. + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2026. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ @@ -27,14 +27,13 @@ uct_cuda_base_query_md_resources(uct_component_t *component, char device_name[10]; int i, num_gpus; - status = UCT_CUDADRV_FUNC(cuDeviceGetCount(&num_gpus), UCS_LOG_LEVEL_DIAG); + status = UCT_CUDADRV_FUNC_LOG_DIAG(cuDeviceGetCount, &num_gpus); if ((status != UCS_OK) || (num_gpus == 0)) { return uct_md_query_empty_md_resource(resources_p, num_resources_p); } for (i = 0; i < num_gpus; ++i) { - status = UCT_CUDADRV_FUNC(cuDeviceGet(&cuda_device, i), - UCS_LOG_LEVEL_DIAG); + status = UCT_CUDADRV_FUNC_LOG_DIAG(cuDeviceGet, &cuda_device, i); if (status != UCS_OK) { continue; } @@ -57,7 +56,7 @@ uct_cuda_base_query_md_resources(uct_component_t *component, UCS_STATIC_INIT { - UCT_CUDADRV_FUNC_LOG_DEBUG(cuInit(0)); + UCT_CUDADRV_FUNC_LOG_DEBUG(cuInit, 0); } UCS_STATIC_CLEANUP diff --git a/src/uct/cuda/base/cuda_util.h b/src/uct/cuda/base/cuda_util.h index 804c2ae1d70..9300b890eb2 100644 --- a/src/uct/cuda/base/cuda_util.h +++ b/src/uct/cuda/base/cuda_util.h @@ -7,8 +7,9 @@ #ifndef UCT_CUDA_UTIL_H #define UCT_CUDA_UTIL_H -#include #include +#include +#include #include @@ -16,35 +17,39 @@ const char *uct_cuda_cu_get_error_string(CUresult result); -#define UCT_CUDADRV_LOG(_func, _log_level, _result) \ +#define UCT_CUDADRV_LOG(_log_level, _func, _result) \ ucs_log((_log_level), "%s failed: %s", UCS_PP_MAKE_STRING(_func), \ uct_cuda_cu_get_error_string(_result)) -#define UCT_CUDADRV_FUNC(_func, _log_level) \ +#define UCT_CUDADRV_FUNC(_log_level, _func, ...) \ ({ \ - CUresult _result = (_func); \ + CUresult _result = UCS_PROFILE_CALL_ALWAYS(_func, ##__VA_ARGS__); \ ucs_status_t _status; \ if (ucs_likely(_result == CUDA_SUCCESS)) { \ _status = UCS_OK; \ } else { \ - UCT_CUDADRV_LOG(_func, _log_level, _result); \ + UCT_CUDADRV_LOG(_log_level, _func, _result); \ _status = UCS_ERR_IO_ERROR; \ } \ _status; \ }) -#define UCT_CUDADRV_FUNC_LOG_ERR(_func) \ - UCT_CUDADRV_FUNC(_func, UCS_LOG_LEVEL_ERROR) +#define UCT_CUDADRV_FUNC_LOG_ERR(_func, ...) \ + UCT_CUDADRV_FUNC(UCS_LOG_LEVEL_ERROR, _func, ##__VA_ARGS__) + + +#define UCT_CUDADRV_FUNC_LOG_WARN(_func, ...) \ + UCT_CUDADRV_FUNC(UCS_LOG_LEVEL_WARN, _func, ##__VA_ARGS__) -#define UCT_CUDADRV_FUNC_LOG_WARN(_func) \ - UCT_CUDADRV_FUNC(_func, UCS_LOG_LEVEL_WARN) +#define UCT_CUDADRV_FUNC_LOG_DIAG(_func, ...) \ + UCT_CUDADRV_FUNC(UCS_LOG_LEVEL_DIAG, _func, ##__VA_ARGS__) -#define UCT_CUDADRV_FUNC_LOG_DEBUG(_func) \ - UCT_CUDADRV_FUNC(_func, UCS_LOG_LEVEL_DEBUG) +#define UCT_CUDADRV_FUNC_LOG_DEBUG(_func, ...) \ + UCT_CUDADRV_FUNC(UCS_LOG_LEVEL_DEBUG, _func, ##__VA_ARGS__) /** diff --git a/src/uct/cuda/cuda_copy/cuda_copy_ep.c b/src/uct/cuda/cuda_copy/cuda_copy_ep.c index e7545ef7b69..8599eef1285 100644 --- a/src/uct/cuda/cuda_copy/cuda_copy_ep.c +++ b/src/uct/cuda/cuda_copy/cuda_copy_ep.c @@ -1,5 +1,5 @@ /** - * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2017-2019. ALL RIGHTS RESERVED. + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2017-2026. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ @@ -142,9 +142,9 @@ static ucs_status_t uct_cuda_copy_ep_push_memory_ctx(CUdeviceptr cuda_deviceptr, CUcontext cuda_context; ucs_status_t status; - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuPointerGetAttribute(&cuda_context, CU_POINTER_ATTRIBUTE_CONTEXT, - cuda_deviceptr)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttribute, &cuda_context, + CU_POINTER_ATTRIBUTE_CONTEXT, + cuda_deviceptr); if (status != UCS_OK) { return status; } @@ -154,7 +154,7 @@ static ucs_status_t uct_cuda_copy_ep_push_memory_ctx(CUdeviceptr cuda_deviceptr, return UCS_ERR_UNSUPPORTED; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(cuda_context)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, cuda_context); if (status != UCS_OK) { return status; } @@ -230,7 +230,7 @@ static UCS_F_ALWAYS_INLINE ucs_status_t uct_cuda_copy_ctx_rsc_get( result = uct_cuda_ctx_get_id(NULL, &ctx_id); if (result != CUDA_SUCCESS) { - UCT_CUDADRV_LOG(cuCtxGetId, UCS_LOG_LEVEL_ERROR, result); + UCT_CUDADRV_LOG(UCS_LOG_LEVEL_ERROR, cuCtxGetId, result); status = UCS_ERR_IO_ERROR; goto err_pop_and_release; } @@ -323,14 +323,14 @@ uct_cuda_copy_post_cuda_async_copy(uct_ep_h tl_ep, void *dst, void *src, goto out_pop_and_release; } - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemcpyAsync((CUdeviceptr)dst, (CUdeviceptr)src, length, *stream)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyAsync, (CUdeviceptr)dst, + (CUdeviceptr)src, length, *stream); if (ucs_unlikely(UCS_OK != status)) { goto err_mpool_put; } - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuEventRecord(cuda_event->event, *stream)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuEventRecord, cuda_event->event, + *stream); if (ucs_unlikely(UCS_OK != status)) { goto err_mpool_put; } @@ -422,12 +422,12 @@ static UCS_F_ALWAYS_INLINE ucs_status_t uct_cuda_copy_ep_rma_short( goto out_pop_and_release; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyAsync(dst, src, length, *stream)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyAsync, dst, src, length, *stream); if (ucs_unlikely(status != UCS_OK)) { goto out_pop_and_release; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuStreamSynchronize(*stream)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuStreamSynchronize, *stream); out_pop_and_release: uct_cuda_ctx_pop_and_release(ctx.cuda_device, ctx.cuda_context); diff --git a/src/uct/cuda/cuda_copy/cuda_copy_md.c b/src/uct/cuda/cuda_copy/cuda_copy_md.c index 8735bc06715..ecde59ab966 100644 --- a/src/uct/cuda/cuda_copy/cuda_copy_md.c +++ b/src/uct/cuda/cuda_copy/cuda_copy_md.c @@ -1,5 +1,5 @@ /** - * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2017-2019. ALL RIGHTS RESERVED. + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2017-2026. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ @@ -93,16 +93,15 @@ int uct_cuda_copy_md_is_dmabuf_supported() int dmabuf_supported = 0; CUdevice cuda_device; - if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGet(&cuda_device, 0)) != UCS_OK) { + if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGet, &cuda_device, 0) != UCS_OK) { return 0; } /* Assume dmabuf support is uniform across all devices */ #if CUDA_VERSION >= 11070 - if (UCT_CUDADRV_FUNC_LOG_DEBUG( - cuDeviceGetAttribute(&dmabuf_supported, - CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED, - cuda_device)) != UCS_OK) { + if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGetAttribute, &dmabuf_supported, + CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED, + cuda_device) != UCS_OK) { return 0; } #endif @@ -165,9 +164,8 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_mem_reg, log_level = (flags & UCT_MD_MEM_FLAG_HIDE_ERRORS) ? UCS_LOG_LEVEL_DEBUG : UCS_LOG_LEVEL_ERROR; - status = UCT_CUDADRV_FUNC(cuMemHostRegister(address, length, - CU_MEMHOSTREGISTER_PORTABLE), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemHostRegister, address, length, + CU_MEMHOSTREGISTER_PORTABLE); if (status != UCS_OK) { return status; } @@ -183,8 +181,7 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_mem_dereg, UCT_MD_MEM_DEREG_CHECK_PARAMS(params, 0); if (params->memh != &uct_cuda_dummy_memh) { - UCT_CUDADRV_FUNC(cuMemHostUnregister((void*)params->memh), - UCS_LOG_LEVEL_DIAG); + UCT_CUDADRV_FUNC_LOG_DIAG(cuMemHostUnregister, (void*)params->memh); } return UCS_OK; @@ -217,9 +214,9 @@ uct_cuda_copy_mem_alloc_fabric(uct_cuda_copy_md_t *md, prop.allocFlags.gpuDirectRDMACapable = 1; if (md->granularity == SIZE_MAX) { - status = UCT_CUDADRV_FUNC(cuMemGetAllocationGranularity( - &md->granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemGetAllocationGranularity, + &md->granularity, &prop, + CU_MEM_ALLOC_GRANULARITY_MINIMUM); if (status != UCS_OK) { return status; } @@ -227,24 +224,23 @@ uct_cuda_copy_mem_alloc_fabric(uct_cuda_copy_md_t *md, alloc_handle->length = ucs_align_up(alloc_handle->length, md->granularity); - status = UCT_CUDADRV_FUNC(cuMemCreate(&alloc_handle->generic_handle, - alloc_handle->length, &prop, 0), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemCreate, + &alloc_handle->generic_handle, + alloc_handle->length, &prop, 0); if (status != UCS_OK) { return UCS_ERR_NO_MEMORY; } - status = UCT_CUDADRV_FUNC(cuMemAddressReserve( - &alloc_handle->ptr, alloc_handle->length, - md->granularity, 0, 0), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemAddressReserve, + &alloc_handle->ptr, alloc_handle->length, + md->granularity, 0, 0); if (status != UCS_OK) { goto err_mem_release; } - status = UCT_CUDADRV_FUNC(cuMemMap(alloc_handle->ptr, alloc_handle->length, - 0, alloc_handle->generic_handle, 0), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemMap, alloc_handle->ptr, + alloc_handle->length, 0, + alloc_handle->generic_handle, 0); if (status != UCS_OK) { goto err_address_free; } @@ -253,18 +249,15 @@ uct_cuda_copy_mem_alloc_fabric(uct_cuda_copy_md_t *md, access_desc.location.type = CU_MEM_LOCATION_TYPE_DEVICE; access_desc.location.id = cu_device; - status = UCT_CUDADRV_FUNC(cuMemSetAccess( - alloc_handle->ptr, alloc_handle->length, &access_desc, 1), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemSetAccess, alloc_handle->ptr, + alloc_handle->length, &access_desc, 1); if (status != UCS_OK) { goto err_mem_unmap; } - status = UCT_CUDADRV_FUNC( - cuPointerGetAttribute(&allowed_types, - CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES, - alloc_handle->ptr), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuPointerGetAttribute, &allowed_types, + CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES, + alloc_handle->ptr); if (status != UCS_OK) { goto err_mem_unmap; } else if (!(allowed_types & CU_MEM_HANDLE_TYPE_FABRIC)) { @@ -282,13 +275,13 @@ uct_cuda_copy_mem_alloc_fabric(uct_cuda_copy_md_t *md, return UCS_OK; err_mem_unmap: - UCT_CUDADRV_FUNC_LOG_DEBUG( - cuMemUnmap(alloc_handle->ptr, alloc_handle->length)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap, alloc_handle->ptr, + alloc_handle->length); err_address_free: - UCT_CUDADRV_FUNC_LOG_DEBUG( - cuMemAddressFree(alloc_handle->ptr, alloc_handle->length)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree, alloc_handle->ptr, + alloc_handle->length); err_mem_release: - UCT_CUDADRV_FUNC_LOG_DEBUG(cuMemRelease(alloc_handle->generic_handle)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease, alloc_handle->generic_handle); #endif return UCS_ERR_NO_MEMORY; } @@ -316,8 +309,8 @@ static ucs_status_t uct_cuda_copy_set_ctx_sync_memops(int log_level) if (cuda_cuCtxSetFlags_func != NULL) { /* Synchronize future DMA operations for all memory types */ - UCT_CUDADRV_FUNC(cuda_cuCtxSetFlags_func(CU_CTX_SYNC_MEMOPS), - log_level); + UCT_CUDADRV_FUNC(log_level, cuda_cuCtxSetFlags_func, + CU_CTX_SYNC_MEMOPS); return UCS_OK; } #endif @@ -339,9 +332,8 @@ static void uct_cuda_copy_sync_memops(CUdeviceptr dptr, int is_vmm) } /* Synchronize for DMA for legacy memory types */ - UCT_CUDADRV_FUNC_LOG_WARN( - cuPointerSetAttribute(&sync_memops_value, - CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr)); + UCT_CUDADRV_FUNC_LOG_WARN(cuPointerSetAttribute, &sync_memops_value, + CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr); } static ucs_status_t @@ -396,9 +388,8 @@ uct_cuda_copy_mem_alloc(uct_md_h uct_md, size_t *length_p, void **address_p, } if (md->config.enable_fabric != UCS_YES) { - status = UCT_CUDADRV_FUNC(cuMemAlloc(&alloc_handle->ptr, - alloc_handle->length), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemAlloc, &alloc_handle->ptr, + alloc_handle->length); if (status == UCS_OK) { goto allocated; } @@ -408,9 +399,9 @@ uct_cuda_copy_mem_alloc(uct_md_h uct_md, size_t *length_p, void **address_p, alloc_handle->length); status = UCS_ERR_NO_MEMORY; } else if (mem_type == UCS_MEMORY_TYPE_CUDA_MANAGED) { - status = UCT_CUDADRV_FUNC( - cuMemAllocManaged(&alloc_handle->ptr, alloc_handle->length, - CU_MEM_ATTACH_GLOBAL), log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemAllocManaged, + &alloc_handle->ptr, alloc_handle->length, + CU_MEM_ATTACH_GLOBAL); } else { ucs_log(log_level, "allocation mem_types supported: cuda, cuda-managed"); @@ -443,20 +434,20 @@ uct_cuda_copy_mem_release_fabric(uct_cuda_copy_alloc_handle_t *alloc_handle) #if HAVE_CUDA_FABRIC ucs_status_t status; - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemRelease(alloc_handle->generic_handle)); + status = UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease, + alloc_handle->generic_handle); if (status != UCS_OK) { return status; } - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemUnmap(alloc_handle->ptr, alloc_handle->length)); + status = UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap, alloc_handle->ptr, + alloc_handle->length); if (status != UCS_OK) { return status; } - return UCT_CUDADRV_FUNC_LOG_ERR( - cuMemAddressFree(alloc_handle->ptr, alloc_handle->length)); + return UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree, alloc_handle->ptr, + alloc_handle->length); #else return UCS_ERR_UNSUPPORTED; #endif @@ -482,8 +473,8 @@ static int uct_cuda_copy_detect_vmm(const void *address, *vmm_mem_type = UCS_MEMORY_TYPE_UNKNOWN; *cuda_device = CU_DEVICE_INVALID; - status = UCT_CUDADRV_FUNC_LOG_DEBUG( - cuMemGetAllocationPropertiesFromHandle(&prop, alloc_handle)); + status = UCT_CUDADRV_FUNC_LOG_DEBUG(cuMemGetAllocationPropertiesFromHandle, + &prop, alloc_handle); if (status != UCS_OK) { goto out; } @@ -502,7 +493,7 @@ static int uct_cuda_copy_detect_vmm(const void *address, } out: - UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease(alloc_handle)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease, alloc_handle); return 1; #else return 0; @@ -518,7 +509,7 @@ static ucs_status_t uct_cuda_copy_mem_free(uct_md_h md, uct_mem_h memh) if (alloc_handle->is_vmm) { status = uct_cuda_copy_mem_release_fabric(alloc_handle); } else { - UCT_CUDADRV_FUNC(cuMemFree(alloc_handle->ptr), UCS_LOG_LEVEL_DIAG); + (void)UCT_CUDADRV_FUNC_LOG_DIAG(cuMemFree, alloc_handle->ptr); status = UCS_OK; } @@ -544,13 +535,14 @@ static size_t uct_cuda_copy_md_get_total_device_mem(CUdevice cuda_device) pthread_mutex_lock(&lock); if (!total_bytes[cuda_device]) { - if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceTotalMem(&total_bytes[cuda_device], - cuda_device)) != UCS_OK) { + if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceTotalMem, + &total_bytes[cuda_device], + cuda_device) != UCS_OK) { goto err; } - if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetName(dev_name, sizeof(dev_name), - cuda_device)) != UCS_OK) { + if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetName, dev_name, + sizeof(dev_name), cuda_device) != UCS_OK) { goto err; } @@ -585,7 +577,7 @@ static void uct_cuda_copy_md_sync_memops_get_address_range( if (cuda_ctx == NULL) { status = uct_cuda_ctx_primary_push(cuda_device, 0, UCS_LOG_LEVEL_ERROR); } else { - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(cuda_ctx)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, cuda_ctx); } if (status != UCS_OK) { return; @@ -600,9 +592,8 @@ static void uct_cuda_copy_md_sync_memops_get_address_range( goto out_ctx_pop; } - if (UCT_CUDADRV_FUNC_LOG_DEBUG( - cuMemGetAddressRange(&base_address, &alloc_length, address)) != - UCS_OK) { + if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuMemGetAddressRange, &base_address, + &alloc_length, address) != UCS_OK) { goto out_ctx_pop; } @@ -622,9 +613,9 @@ static void uct_cuda_copy_md_sync_memops_get_address_range( mem_info->alloc_length = alloc_length; out_ctx_pop: - UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(&tmp_ctx)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, &tmp_ctx); if (cuda_ctx == NULL) { - UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device); } } @@ -660,10 +651,10 @@ uct_cuda_copy_md_query_attributes(const uct_cuda_copy_md_t *md, attr_type[3] = CU_POINTER_ATTRIBUTE_CONTEXT; attr_data[3] = &cuda_mem_ctx; - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuPointerGetAttributes(ucs_static_array_size(attr_data), - attr_type, attr_data, - (CUdeviceptr)address)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttributes, + ucs_static_array_size(attr_data), + attr_type, attr_data, + (CUdeviceptr)address); if (status != UCS_OK) { /* pointer not recognized */ return status; diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc.inl b/src/uct/cuda/cuda_ipc/cuda_ipc.inl index 0fa6c3beec6..171d0eaeeef 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc.inl +++ b/src/uct/cuda/cuda_ipc/cuda_ipc.inl @@ -29,9 +29,9 @@ uct_cuda_ipc_check_and_push_ctx(CUdeviceptr address, CUdevice *cuda_device_p, attr_type[1] = CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL; attr_data[1] = &cuda_device_ordinal; - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuPointerGetAttributes(UCT_CUDA_IPC_NUM_ATTRS, attr_type, attr_data, - address)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttributes, + UCT_CUDA_IPC_NUM_ATTRS, attr_type, + attr_data, address); if (ucs_unlikely(status != UCS_OK)) { return status; } @@ -39,8 +39,8 @@ uct_cuda_ipc_check_and_push_ctx(CUdeviceptr address, CUdevice *cuda_device_p, ucs_assertv(cuda_device_ordinal >= 0, "cuda_device_ordinal=%d", cuda_device_ordinal); - status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&cuda_device, - cuda_device_ordinal)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cuda_device, + cuda_device_ordinal); if (ucs_unlikely(status != UCS_OK)) { return status; } @@ -51,16 +51,16 @@ uct_cuda_ipc_check_and_push_ctx(CUdeviceptr address, CUdevice *cuda_device_p, return status; } - UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device); } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetCurrent(&cuda_ctx_current)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetCurrent, &cuda_ctx_current); if (ucs_unlikely(status != UCS_OK)) { return status; } if (cuda_ctx != cuda_ctx_current) { - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(cuda_ctx)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, cuda_ctx); if (ucs_unlikely(status != UCS_OK)) { return status; } @@ -78,7 +78,7 @@ static UCS_F_ALWAYS_INLINE void uct_cuda_ipc_check_and_pop_ctx(int is_ctx_pushed) { if (is_ctx_pushed) { - UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); } } diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c index f1484418abe..cb6f9dedbee 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c @@ -140,9 +140,9 @@ uct_cuda_ipc_primary_ctx_retain_and_push(CUdevice cuda_device) return status; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(cuda_ctx)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, cuda_ctx); if (status != UCS_OK) { - UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device); } return status; @@ -150,8 +150,8 @@ uct_cuda_ipc_primary_ctx_retain_and_push(CUdevice cuda_device) static void uct_cuda_ipc_primary_ctx_pop_and_release(CUdevice cuda_device) { - UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); - UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device); } static ucs_status_t @@ -164,8 +164,8 @@ uct_cuda_ipc_close_memhandle_legacy(uct_cuda_ipc_cache_region_t *region) return status; } - status = UCT_CUDADRV_FUNC_LOG_WARN( - cuIpcCloseMemHandle((CUdeviceptr)region->mapped_addr)); + status = UCT_CUDADRV_FUNC_LOG_WARN(cuIpcCloseMemHandle, + (CUdeviceptr)region->mapped_addr); uct_cuda_ipc_primary_ctx_pop_and_release(region->cu_dev); return status; } @@ -176,17 +176,19 @@ static ucs_status_t uct_cuda_ipc_close_memhandle(uct_cuda_ipc_cache_region_t *re ucs_status_t status; if (region->key.ph.handle_type == UCT_CUDA_IPC_KEY_HANDLE_TYPE_VMM) { - status = UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap( - (CUdeviceptr)region->mapped_addr, region->key.b_len)); + status = UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap, + (CUdeviceptr)region->mapped_addr, + region->key.b_len); if (status != UCS_OK) { return status; } - return UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree( - (CUdeviceptr)region->mapped_addr, region->key.b_len)); + return UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree, + (CUdeviceptr)region->mapped_addr, + region->key.b_len); } else if (region->key.ph.handle_type == UCT_CUDA_IPC_KEY_HANDLE_TYPE_MEMPOOL) { - return UCT_CUDADRV_FUNC_LOG_WARN( - cuMemFree((CUdeviceptr)region->mapped_addr)); + return UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, + (CUdeviceptr)region->mapped_addr); } #endif return uct_cuda_ipc_close_memhandle_legacy(region); @@ -281,8 +283,8 @@ uct_cuda_ipc_open_memhandle_legacy(CUipcMemHandle memh, CUdevice cu_dev, return status; } - cuerr = cuIpcOpenMemHandle(mapped_addr, memh, - CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS); + cuerr = UCS_PROFILE_CALL_ALWAYS(cuIpcOpenMemHandle, mapped_addr, memh, + CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS); if (cuerr != CUDA_SUCCESS) { ucs_log(log_level, "cuIpcOpenMemHandle() failed: %s", uct_cuda_cu_get_error_string(cuerr)); @@ -313,29 +315,29 @@ uct_cuda_ipc_open_memhandle_vmm(const uct_cuda_ipc_rkey_t *key, CUdevice cu_dev, CUdeviceptr dptr; CUmemGenericAllocationHandle handle; - status = UCT_CUDADRV_FUNC(cuMemImportFromShareableHandle(&handle, - (void*)&key->ph.handle.fabric_handle, - CU_MEM_HANDLE_TYPE_FABRIC), log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemImportFromShareableHandle, + &handle, (void*)&key->ph.handle.fabric_handle, + CU_MEM_HANDLE_TYPE_FABRIC); if (status != UCS_OK) { goto out; } - status = UCT_CUDADRV_FUNC(cuMemAddressReserve(&dptr, key->b_len, 0, 0, 0), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemAddressReserve, &dptr, key->b_len, + 0, 0, 0); if (status != UCS_OK) { goto release_handle; } - status = UCT_CUDADRV_FUNC(cuMemMap(dptr, key->b_len, 0, handle, 0), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemMap, dptr, key->b_len, 0, handle, + 0); if (status != UCS_OK) { goto release_va_range; } uct_cuda_ipc_init_access_desc(&access_desc, cu_dev); - status = UCT_CUDADRV_FUNC(cuMemSetAccess(dptr, key->b_len, &access_desc, 1), - log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemSetAccess, dptr, key->b_len, + &access_desc, 1); if (status != UCS_OK) { goto unmap_range; } @@ -344,11 +346,11 @@ uct_cuda_ipc_open_memhandle_vmm(const uct_cuda_ipc_rkey_t *key, CUdevice cu_dev, goto release_handle; unmap_range: - UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap(dptr, key->b_len)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap, dptr, key->b_len); release_va_range: - UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree(dptr, key->b_len)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree, dptr, key->b_len); release_handle: - UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease(handle)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease, handle); out: return status; } @@ -362,23 +364,23 @@ static ucs_status_t cuda_ipc_rem_mpool_cache_create(uct_cuda_ipc_rkey_t *key, CUdeviceptr dptr; ucs_status_t status; - status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolImportFromShareableHandle( - mpool, (void *)&key->ph.handle.fabric_handle, - CU_MEM_HANDLE_TYPE_FABRIC, 0)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolImportFromShareableHandle, mpool, + (void*)&key->ph.handle.fabric_handle, + CU_MEM_HANDLE_TYPE_FABRIC, 0); if (status != UCS_OK) { goto err; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolImportPointer(&dptr, *mpool, - (CUmemPoolPtrExportData*)&key->ph.ptr)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolImportPointer, &dptr, *mpool, + (CUmemPoolPtrExportData*)&key->ph.ptr); if (status != UCS_OK) { goto err_free_mpool; } uct_cuda_ipc_init_access_desc(&access_desc, cu_dev); - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemPoolSetAccess(*mpool, &access_desc, 1)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolSetAccess, *mpool, &access_desc, + 1); if (status != UCS_OK) { goto err_free_ptr; } @@ -387,9 +389,9 @@ static ucs_status_t cuda_ipc_rem_mpool_cache_create(uct_cuda_ipc_rkey_t *key, return UCS_OK; err_free_ptr: - cuMemFree(dptr); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, dptr); err_free_mpool: - cuMemPoolDestroy(*mpool); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemPoolDestroy, *mpool); err: return status; } @@ -441,8 +443,9 @@ uct_cuda_ipc_open_memhandle_mempool(uct_cuda_ipc_rkey_t *key, CUdevice cu_dev, } out_import_pointer: - status = UCT_CUDADRV_FUNC(cuMemPoolImportPointer(mapped_addr, key->ph.pool, - (CUmemPoolPtrExportData*)&key->ph.ptr), log_level); + status = UCT_CUDADRV_FUNC(log_level, cuMemPoolImportPointer, mapped_addr, + key->ph.pool, + (CUmemPoolPtrExportData*)&key->ph.ptr); err: pthread_rwlock_unlock(&uct_cuda_ipc_rem_mpool_cache.lock); @@ -610,7 +613,7 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle, CUuuid uuid; int ret; - status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetUuid(&uuid, cu_dev)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetUuid, &uuid, cu_dev); if (status != UCS_OK) { return status; } diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c index e8b62347d1e..82b993e6876 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c @@ -1,11 +1,11 @@ /** - * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2019. ALL RIGHTS RESERVED. + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2026. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ - #ifdef HAVE_CONFIG_H - # include "config.h" - #endif +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include #include @@ -43,7 +43,8 @@ static UCS_CLASS_INIT_FUNC(uct_cuda_ipc_ep_t, const uct_ep_params_t *params) static UCS_CLASS_CLEANUP_FUNC(uct_cuda_ipc_ep_t) { if (self->device_ep != NULL) { - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree((CUdeviceptr)self->device_ep)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, + (CUdeviceptr)self->device_ep); } } @@ -76,7 +77,7 @@ static UCS_F_ALWAYS_INLINE ucs_status_t uct_cuda_ipc_ctx_rsc_get( result = uct_cuda_ctx_get_id(NULL, &ctx_id); if (ucs_unlikely(result != CUDA_SUCCESS)) { - UCT_CUDADRV_LOG(cuCtxGetId, UCS_LOG_LEVEL_ERROR, result); + UCT_CUDADRV_LOG(UCS_LOG_LEVEL_ERROR, cuCtxGetId, result); return UCS_ERR_IO_ERROR; } @@ -159,15 +160,15 @@ uct_cuda_ipc_post_cuda_async_copy(uct_ep_h tl_ep, uint64_t remote_addr, src = (CUdeviceptr) ((direction == UCT_CUDA_IPC_PUT) ? iov[0].buffer : mapped_rem_addr); - status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyDtoDAsync(dst, src, iov[0].length, - *stream)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyDtoDAsync, dst, src, + iov[0].length, *stream); if (UCS_OK != status) { ucs_mpool_put(cuda_ipc_event); goto out; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuEventRecord(cuda_ipc_event->super.event, - *stream)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuEventRecord, + cuda_ipc_event->super.event, *stream); if (UCS_OK != status) { ucs_mpool_put(cuda_ipc_event); goto out; @@ -247,14 +248,14 @@ ucs_status_t uct_cuda_ipc_ep_get_device_ep(uct_ep_h tl_ep, } device_ep.uct_tl_id = UCT_DEVICE_TL_CUDA_IPC; - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemAlloc((CUdeviceptr *)&ep->device_ep, sizeof(uct_device_ep_t))); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc, (CUdeviceptr*)&ep->device_ep, + sizeof(uct_device_ep_t)); if (status != UCS_OK) { goto err; } - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemcpyHtoD((CUdeviceptr)ep->device_ep, &device_ep, sizeof(uct_device_ep_t))); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyHtoD, (CUdeviceptr)ep->device_ep, + &device_ep, sizeof(uct_device_ep_t)); if (status != UCS_OK) { goto err_free_mem; } @@ -263,7 +264,7 @@ ucs_status_t uct_cuda_ipc_ep_get_device_ep(uct_ep_h tl_ep, *device_ep_p = ep->device_ep; return UCS_OK; err_free_mem: - cuMemFree((CUdeviceptr)ep->device_ep); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, (CUdeviceptr)ep->device_ep); ep->device_ep = NULL; err: return status; diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c b/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c index ef41ec5263c..884143fdfa1 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c @@ -1,5 +1,5 @@ /** - * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2019. ALL RIGHTS RESERVED. + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2026. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ @@ -157,15 +157,14 @@ static double uct_cuda_ipc_iface_get_bw() int major_version; ucs_status_t status; - status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&cu_device, 0)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cu_device, 0); if (status != UCS_OK) { return 0; } status = UCT_CUDADRV_FUNC_LOG_ERR( - cuDeviceGetAttribute(&major_version, - CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, - cu_device)); + cuDeviceGetAttribute, &major_version, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cu_device); if (status != UCS_OK) { return 0; } diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_md.c b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c index 3f6173def7d..662cb3fd8a3 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_md.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c @@ -63,7 +63,7 @@ static uct_cuda_ipc_dev_cache_t *uct_cuda_ipc_create_dev_cache(int dev_num) ucs_status_t status; int i, num_devices; - status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount(&num_devices)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount, &num_devices); if (UCS_OK != status) { return NULL; } @@ -163,15 +163,15 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh, goto out; } - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemGetAddressRange(&key->d_bptr, &key->b_len, (CUdeviceptr)addr)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemGetAddressRange, &key->d_bptr, + &key->b_len, (CUdeviceptr)addr); if (status != UCS_OK) { goto out_pop_ctx; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttribute(&key->ph.buffer_id, + status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttribute, &key->ph.buffer_id, CU_POINTER_ATTRIBUTE_BUFFER_ID, - (CUdeviceptr)addr)); + (CUdeviceptr)addr); if (status != UCS_OK) { goto out_pop_ctx; } @@ -187,9 +187,9 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh, attr_type[2] = CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE; attr_data[2] = &mempool; - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuPointerGetAttributes(ucs_static_array_size(attr_data), attr_type, - attr_data, (CUdeviceptr)addr)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttributes, + ucs_static_array_size(attr_data), + attr_type, attr_data, (CUdeviceptr)addr); if (status != UCS_OK) { goto out_pop_ctx; } @@ -202,15 +202,13 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh, goto non_ipc; } - status = - UCT_CUDADRV_FUNC(cuMemRetainAllocationHandle(&handle, addr), - UCS_LOG_LEVEL_DIAG); + status = UCT_CUDADRV_FUNC_LOG_DIAG(cuMemRetainAllocationHandle, &handle, + addr); if (status == UCS_OK) { - status = - UCT_CUDADRV_FUNC_LOG_ERR(cuMemExportToShareableHandle( - &key->ph.handle.fabric_handle, handle, - CU_MEM_HANDLE_TYPE_FABRIC, 0)); - UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease(handle)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemExportToShareableHandle, + &key->ph.handle.fabric_handle, handle, + CU_MEM_HANDLE_TYPE_FABRIC, 0); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease, handle); if (status != UCS_OK) { ucs_debug("unable to export handle for VMM ptr: %p", addr); goto non_ipc; @@ -229,16 +227,16 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh, goto out_pop_ctx; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolExportToShareableHandle( - (void *)&key->ph.handle.fabric_handle, mempool, - CU_MEM_HANDLE_TYPE_FABRIC, 0)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolExportToShareableHandle, + (void*)&key->ph.handle.fabric_handle, + mempool, CU_MEM_HANDLE_TYPE_FABRIC, 0); if (status != UCS_OK) { ucs_debug("unable to export handle for mempool ptr: %p", addr); goto non_ipc; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolExportPointer(&key->ph.ptr, - (CUdeviceptr)key->d_bptr)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolExportPointer, &key->ph.ptr, + (CUdeviceptr)key->d_bptr); if (status != UCS_OK) { goto out_pop_ctx; } @@ -253,8 +251,8 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh, #endif legacy_path: key->ph.handle_type = UCT_CUDA_IPC_KEY_HANDLE_TYPE_LEGACY; - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuIpcGetMemHandle(&key->ph.handle.legacy, (CUdeviceptr)addr)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuIpcGetMemHandle, &key->ph.handle.legacy, + (CUdeviceptr)addr); if (status != UCS_OK) { goto out_pop_ctx; } @@ -323,8 +321,8 @@ uct_cuda_ipc_mkey_pack(uct_md_h md, uct_mem_h tl_memh, void *address, packed->pid |= UCT_CUDA_IPC_RKEY_FLAG_PID_NS; } - return UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetUuid(&packed->uuid, - memh->dev_num)); + return UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetUuid, &packed->uuid, + memh->dev_num); } static ucs_status_t @@ -570,7 +568,7 @@ uct_cuda_ipc_md_mem_elem_pack(uct_md_h md, uct_mem_h memh, uct_rkey_t rkey, CUdevice cuda_device; void *mapped_addr; - if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuCtxGetDevice(&cuda_device)) != UCS_OK) { + if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuCtxGetDevice, &cuda_device) != UCS_OK) { return UCS_ERR_UNREACHABLE; } @@ -632,7 +630,7 @@ ucs_status_t uct_cuda_ipc_rkey_ptr(uct_component_t *component, uct_rkey_t rkey, CUdevice cu_dev; ucs_status_t status; - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetDevice(&cu_dev)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetDevice, &cu_dev); if (ucs_unlikely(status != UCS_OK)) { return status; } diff --git a/src/uct/ib/mlx5/gdaki/gdaki.c b/src/uct/ib/mlx5/gdaki/gdaki.c index c243327b682..e4320fa9f11 100644 --- a/src/uct/ib/mlx5/gdaki/gdaki.c +++ b/src/uct/ib/mlx5/gdaki/gdaki.c @@ -87,15 +87,15 @@ uct_rc_gdaki_alloc(size_t size, size_t align, void **p_buf, CUdeviceptr *p_orig) unsigned int flag = 1; ucs_status_t status; - status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc(p_orig, size + align - 1)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc, p_orig, size + align - 1); if (status != UCS_OK) { return status; } *p_buf = (void*)ucs_align_up_pow2_ptr(*p_orig, align); - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, - (CUdeviceptr)*p_buf)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerSetAttribute, &flag, + CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, + (CUdeviceptr)*p_buf); if (status != UCS_OK) { goto err; } @@ -103,7 +103,7 @@ uct_rc_gdaki_alloc(size_t size, size_t align, void **p_buf, CUdeviceptr *p_orig) return UCS_OK; err: - cuMemFree(*p_orig); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, *p_orig); return status; } @@ -134,7 +134,7 @@ static int uct_gdaki_check_umem_dmabuf(const uct_ib_md_t *md) uct_cuda_copy_md_dmabuf_t dmabuf; CUdeviceptr buff; - if (UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc(&buff, 1)) != UCS_OK) { + if (UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc, &buff, 1) != UCS_OK) { goto out; } @@ -162,7 +162,7 @@ static int uct_gdaki_check_umem_dmabuf(const uct_ib_md_t *md) out_close: ucs_close_fd(&dmabuf.fd); out_free: - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree(buff)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, buff); out: #endif return ret; @@ -245,7 +245,7 @@ uct_rc_gdaki_init_umem(uct_rc_gdaki_iface_t *iface, uint64_t pgsz_bitmap, uct_ib_md_t); ucs_status_t status; - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(iface->cuda_ctx)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, iface->cuda_ctx); if (status != UCS_OK) { return status; } @@ -266,15 +266,15 @@ uct_rc_gdaki_init_umem(uct_rc_gdaki_iface_t *iface, uint64_t pgsz_bitmap, goto err_umem; } - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); return UCS_OK; err_umem: - cuMemFree(mem->gpu_raw); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, mem->gpu_raw); mem->gpu_mem = NULL; mem->gpu_raw = 0; out_ctx: - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); return status; } @@ -441,7 +441,7 @@ uct_rc_gdaki_pool_chunk_alloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) err_umem: mlx5dv_devx_umem_dereg(hdr->umem); - cuMemFree(hdr->gpu_raw); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, hdr->gpu_raw); err_free_hdr: ucs_free(hdr); return status; @@ -461,7 +461,7 @@ static void uct_rc_gdaki_pool_chunk_release(ucs_mpool_t *mp, void *chunk) mp_chunk->num_elems, iface->num_channels - 1); mlx5dv_devx_umem_dereg(hdr->umem); - cuMemFree(hdr->gpu_raw); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, hdr->gpu_raw); ucs_free(hdr); } @@ -591,7 +591,7 @@ uct_rc_gdaki_ep_init_channels_direct(uct_rc_gdaki_iface_t *iface, err_umem: mlx5dv_devx_umem_dereg(ep->mem.umem); - cuMemFree(ep->mem.gpu_raw); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, ep->mem.gpu_raw); err_block: ucs_free(ep->channel_block); uct_rc_gdaki_ep_reset_channels(ep); @@ -608,7 +608,7 @@ static void uct_rc_gdaki_cleanup_channels_direct(uct_rc_gdaki_iface_t *iface, uct_rc_gdaki_chunk_channels_destroy(iface, NULL, ep->channel_block, 1, 1, iface->num_channels - 1); mlx5dv_devx_umem_dereg(ep->mem.umem); - cuMemFree(ep->mem.gpu_raw); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, ep->mem.gpu_raw); ucs_free(ep->channel_block); uct_rc_gdaki_ep_reset_channels(ep); } @@ -656,7 +656,8 @@ static UCS_CLASS_CLEANUP_FUNC(uct_rc_gdaki_ep_t) if (self->dev_ep_init) { uct_rc_gdaki_channel_t *channels = self->channel_block->channels; for (i = 0; i < iface->num_channels; i++) { - (void)cuMemHostUnregister(channels[i].qp.reg->addr.ptr); + (void)UCS_PROFILE_CALL_ALWAYS(cuMemHostUnregister, + channels[i].qp.reg->addr.ptr); } } uct_rc_gdaki_ep_cleanup_channels(iface, self); @@ -832,7 +833,7 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p) pthread_mutex_lock(&iface->ep_init_lock); if (!ep->dev_ep_init) { - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(iface->cuda_ctx)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, iface->cuda_ctx); if (status != UCS_OK) { goto out_unlock; } @@ -849,8 +850,8 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p) } status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemsetD8((CUdeviceptr)ep->channel_block->gpu_ptr, 0, - dev_ep_host_size)); + cuMemsetD8, (CUdeviceptr)ep->channel_block->gpu_ptr, 0, + dev_ep_host_size); if (status != UCS_OK) { goto out_free; } @@ -868,15 +869,15 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p) for (i = 0; i < iface->num_channels; ++i) { channel = &ep->channel_block->channels[i]; - (void)cuMemHostRegister(channel->qp.reg->addr.ptr, - UCT_IB_MLX5_BF_REG_SIZE * 2, - CU_MEMHOSTREGISTER_PORTABLE | - CU_MEMHOSTREGISTER_DEVICEMAP | - CU_MEMHOSTREGISTER_IOMEMORY); - - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemHostGetDevicePointer(&sq_db, channel->qp.reg->addr.ptr, - 0)); + (void)UCS_PROFILE_CALL_ALWAYS(cuMemHostRegister, + channel->qp.reg->addr.ptr, + UCT_IB_MLX5_BF_REG_SIZE * 2, + CU_MEMHOSTREGISTER_PORTABLE | + CU_MEMHOSTREGISTER_DEVICEMAP | + CU_MEMHOSTREGISTER_IOMEMORY); + + status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemHostGetDevicePointer, &sq_db, + channel->qp.reg->addr.ptr, 0); if (status != UCS_OK) { goto out_unreg; } @@ -889,14 +890,14 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p) } status = UCT_CUDADRV_FUNC_LOG_ERR( - cuMemcpyHtoD((CUdeviceptr)ep->channel_block->gpu_ptr, dev_ep, - dev_ep_host_size)); + cuMemcpyHtoD, (CUdeviceptr)ep->channel_block->gpu_ptr, dev_ep, + dev_ep_host_size); if (status != UCS_OK) { goto out_unreg; } ucs_free(dev_ep); - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); ep->dev_ep_init = 1; } @@ -907,13 +908,14 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p) out_unreg: do { - (void)cuMemHostUnregister( + (void)UCS_PROFILE_CALL_ALWAYS( + cuMemHostUnregister, ep->channel_block->channels[i].qp.reg->addr.ptr); } while (i-- > 0); out_free: ucs_free(dev_ep); out_ctx: - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); out_unlock: pthread_mutex_unlock(&iface->ep_init_lock); return status; @@ -1024,24 +1026,24 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md, } cuda_id = atoi(gpu_name + UCT_DEVICE_CUDA_NAME_LEN); - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuDeviceGetPCIBusId(pci_addr, UCS_SYS_BDF_NAME_MAX, cuda_id)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetPCIBusId, pci_addr, + UCS_SYS_BDF_NAME_MAX, cuda_id); if (status != UCS_OK) { return status; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&self->cuda_dev, cuda_id)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &self->cuda_dev, cuda_id); if (status != UCS_OK) { return status; } - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuDevicePrimaryCtxRetain(&self->cuda_ctx, self->cuda_dev)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain, &self->cuda_ctx, + self->cuda_dev); if (status != UCS_OK) { return status; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(self->cuda_ctx)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, self->cuda_ctx); if (status != UCS_OK) { goto err_ctx_release; } @@ -1074,7 +1076,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md, } } - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); return UCS_OK; err_pool: @@ -1084,9 +1086,9 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md, err_atomic_buff: ucs_free(self->atomic_buff); err_ctx: - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); err_ctx_release: - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(self->cuda_dev)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, self->cuda_dev); return status; } @@ -1095,12 +1097,12 @@ static UCS_CLASS_CLEANUP_FUNC(uct_rc_gdaki_iface_t) pthread_mutex_destroy(&self->ep_init_lock); ibv_dereg_mr(self->atomic_mr); ucs_free(self->atomic_buff); - (void)UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(self->cuda_ctx)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPushCurrent, self->cuda_ctx); if (self->ep_alloc_mode == UCT_RC_GDAKI_EP_ALLOC_MODE_POOL) { uct_rc_gdaki_iface_cleanup_channel_pool(self); } - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(self->cuda_dev)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, self->cuda_dev); } UCS_CLASS_DEFINE(uct_rc_gdaki_iface_t, uct_rc_mlx5_iface_common_t); @@ -1124,10 +1126,10 @@ static ucs_status_t uct_gdaki_md_check_uar(uct_ib_mlx5_md_t *md) flags = CU_MEMHOSTREGISTER_PORTABLE | CU_MEMHOSTREGISTER_DEVICEMAP | CU_MEMHOSTREGISTER_IOMEMORY; - status = UCT_CUDADRV_FUNC_LOG_DEBUG( - cuMemHostRegister(uar->reg_addr, UCT_IB_MLX5_BF_REG_SIZE, flags)); + status = UCT_CUDADRV_FUNC_LOG_DEBUG(cuMemHostRegister, uar->reg_addr, + UCT_IB_MLX5_BF_REG_SIZE, flags); if (status == UCS_OK) { - UCT_CUDADRV_FUNC_LOG_WARN(cuMemHostUnregister(uar->reg_addr)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemHostUnregister, uar->reg_addr); } mlx5dv_devx_free_uar(uar); @@ -1270,7 +1272,7 @@ uct_gdaki_dev_matrix_init(const uct_ib_md_t *ib_md, size_t *dmat_length_p) } /* Get the number of CUDA devices */ - status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount(&cudadev_count)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount, &cudadev_count); if (status != UCS_OK) { goto out; } @@ -1287,8 +1289,8 @@ uct_gdaki_dev_matrix_init(const uct_ib_md_t *ib_md, size_t *dmat_length_p) /* Map each CUDA device to the best suited IB devices */ for (cudadev_index = 0; cudadev_index < cudadev_count; cudadev_index++) { - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuDeviceGet(&cuda_dev, cudadev_index)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cuda_dev, + cudadev_index); if (status != UCS_OK) { goto out; } @@ -1351,7 +1353,7 @@ static CUdevice uct_gdaki_push_primary_ctx(int retain_inactive_ctx) return CU_DEVICE_INVALID; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&cuda_dev, 0)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cuda_dev, 0); if (status != UCS_OK) { return CU_DEVICE_INVALID; } @@ -1461,7 +1463,7 @@ uct_gdaki_query_tl_devices(uct_md_h tl_md, num_tl_devices = 0; ucs_for_each_bit(i, ibdesc->cuda_map) { - status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&device, i)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &device, i); if (status != UCS_OK) { goto err; } diff --git a/test/gtest/uct/test_device.cc b/test/gtest/uct/test_device.cc index 5dd8f7d650a..c2c770f34af 100644 --- a/test/gtest/uct/test_device.cc +++ b/test/gtest/uct/test_device.cc @@ -40,9 +40,9 @@ class test_device_cuda_ctx_guard { test_device_cuda_ctx_guard() : m_dev(0), m_ctx(NULL), m_is_active(false) { - (void)UCT_CUDADRV_FUNC_LOG_DEBUG(cuInit(0)); + (void)UCT_CUDADRV_FUNC_LOG_DEBUG(cuInit, 0); - if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&m_dev, 0)) != UCS_OK) { + if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &m_dev, 0) != UCS_OK) { return; } @@ -65,13 +65,13 @@ class test_device_cuda_ctx_guard { return; } - if (UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain(&m_ctx, m_dev)) != + if (UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain, &m_ctx, m_dev) != UCS_OK) { return; } - if (UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(m_ctx)) != UCS_OK) { - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(m_dev)); + if (UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, m_ctx) != UCS_OK) { + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, m_dev); return; } @@ -84,8 +84,8 @@ class test_device_cuda_ctx_guard { return; } - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(m_dev)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, m_dev); m_is_active = false; } }; @@ -115,11 +115,11 @@ class test_device : public uct_test { return; } - status = UCT_CUDADRV_FUNC_LOG_ERR( - cuDevicePrimaryCtxRetain(&ctx, m_cuda_dev)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain, &ctx, + m_cuda_dev); ASSERT_UCS_OK(status); - status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(ctx)); + status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, ctx); ASSERT_UCS_OK(status); } @@ -131,8 +131,8 @@ class test_device : public uct_test { return; } - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL)); - (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(m_cuda_dev)); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL); + (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, m_cuda_dev); } entity *m_sender;