diff --git a/src/uct/cuda/base/cuda_ctx.c b/src/uct/cuda/base/cuda_ctx.c
index 1a763a31401..a4125a14887 100644
--- a/src/uct/cuda/base/cuda_ctx.c
+++ b/src/uct/cuda/base/cuda_ctx.c
@@ -20,8 +20,8 @@ ucs_status_t uct_cuda_ctx_primary_retain(CUdevice cuda_device, int force,
     CUcontext cuda_ctx;
 
     if (!force) {
-        status = UCT_CUDADRV_FUNC_LOG_ERR(
-                    cuDevicePrimaryCtxGetState(cuda_device, &flags, &active));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxGetState,
+                                          cuda_device, &flags, &active);
         if (status != UCS_OK) {
             return status;
         }
@@ -33,8 +33,8 @@ ucs_status_t uct_cuda_ctx_primary_retain(CUdevice cuda_device, int force,
         }
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-                cuDevicePrimaryCtxRetain(&cuda_ctx, cuda_device));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain, &cuda_ctx,
+                                      cuda_device);
     if (status != UCS_OK) {
         return status;
     }
@@ -49,14 +49,14 @@ ucs_status_t uct_cuda_ctx_primary_push_first_active(CUdevice *cuda_device_p)
     ucs_status_t status;
     CUdevice cuda_device;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount(&num_devices));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount, &num_devices);
     if (status != UCS_OK) {
         return status;
     }
 
     for (device_index = 0; device_index < num_devices; ++device_index) {
-        status = UCT_CUDADRV_FUNC_LOG_ERR(
-                    cuDeviceGet(&cuda_device, device_index));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cuda_device,
+                                          device_index);
         if (status != UCS_OK) {
             return status;
         }
@@ -85,9 +85,9 @@ ucs_status_t uct_cuda_ctx_primary_push(CUdevice cuda_device, int retain_inactive
         return status;
     }
 
-    status = UCT_CUDADRV_FUNC(cuCtxPushCurrent(primary_ctx), log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuCtxPushCurrent, primary_ctx);
     if (status != UCS_OK) {
-        (void)UCT_CUDADRV_FUNC(cuDevicePrimaryCtxRelease(cuda_device), log_level);
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device);
     }
 
     return status;
@@ -102,7 +102,7 @@ ucs_status_t uct_cuda_ctx_primary_push_avail(int retain_inactive,
     ucs_status_t status;
     int dev_ordinal, num_devices;
 
-    status = UCT_CUDADRV_FUNC_LOG_DEBUG(cuCtxGetDevice(cuda_device_p));
+    status = UCT_CUDADRV_FUNC_LOG_DEBUG(cuCtxGetDevice, cuda_device_p);
     if (status != UCS_OK) {
         *cuda_device_p = CU_DEVICE_INVALID;
     }
@@ -137,15 +137,15 @@ ucs_status_t uct_cuda_ctx_primary_push_avail(int retain_inactive,
         return UCS_OK;
     }
 
-    status = UCT_CUDADRV_FUNC(cuDeviceGetCount(&num_devices), UCS_LOG_LEVEL_DIAG);
+    status = UCT_CUDADRV_FUNC_LOG_DIAG(cuDeviceGetCount, &num_devices);
     if (status != UCS_OK) {
         return UCS_ERR_INVALID_PARAM;
     }
 
     /* Use the first active cuda device for allocation */
     for (dev_ordinal = 0; dev_ordinal < num_devices; dev_ordinal++) {
-        if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGet(avail_cuda_device_p,
-                                                   dev_ordinal)) != UCS_OK) {
+        if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGet, avail_cuda_device_p,
+                                       dev_ordinal) != UCS_OK) {
             continue;
         }
 
@@ -162,6 +162,6 @@ ucs_status_t uct_cuda_ctx_primary_push_avail(int retain_inactive,
 
 void uct_cuda_ctx_primary_pop_and_release(CUdevice cuda_device)
 {
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device);
 }
diff --git a/src/uct/cuda/base/cuda_ctx.inl b/src/uct/cuda/base/cuda_ctx.inl
index 694c2580e1e..c3995fc105d 100644
--- a/src/uct/cuda/base/cuda_ctx.inl
+++ b/src/uct/cuda/base/cuda_ctx.inl
@@ -38,12 +38,12 @@ uct_cuda_ctx_pop_and_release(CUdevice cuda_device, CUcontext cuda_context)
         return;
     }
 
-    UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
     if (cuda_device == CU_DEVICE_INVALID) {
         return;
     }
 
-    UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device);
 }
 
 #endif
diff --git a/src/uct/cuda/base/cuda_iface.c b/src/uct/cuda/base/cuda_iface.c
index 6586efbe536..653078f3702 100644
--- a/src/uct/cuda/base/cuda_iface.c
+++ b/src/uct/cuda/base/cuda_iface.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2019. ALL RIGHTS RESERVED.
+ * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2019-2026. ALL RIGHTS RESERVED.
  *
  * See file LICENSE for terms.
  */
@@ -28,7 +28,7 @@ uct_cuda_base_query_devices_common(
     ucs_status_t status;
 
     if (uct_cuda_ctx_is_active()) {
-        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetDevice(&cuda_device));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetDevice, &cuda_device);
         if (status != UCS_OK) {
             return status;
         }
@@ -141,15 +141,13 @@ ucs_status_t uct_cuda_base_iface_event_fd_arm(uct_iface_h tl_iface,
         if (!ucs_queue_is_empty(event_q)) {
             status =
 #if (__CUDACC_VER_MAJOR__ >= 100000)
-                UCT_CUDADRV_FUNC_LOG_ERR(
-                        cuLaunchHostFunc(*stream,
-                                         uct_cuda_base_iface_stream_cb_fxn,
-                                         iface));
+                    UCT_CUDADRV_FUNC_LOG_ERR(cuLaunchHostFunc, *stream,
+                                             uct_cuda_base_iface_stream_cb_fxn,
+                                             iface);
 #else
-                UCT_CUDADRV_FUNC_LOG_ERR(
-                        cuStreamAddCallback(*stream,
-                                            uct_cuda_base_iface_stream_cb_fxn,
-                                            iface, 0));
+                    UCT_CUDADRV_FUNC_LOG_ERR(cuStreamAddCallback, *stream,
+                                             uct_cuda_base_iface_stream_cb_fxn,
+                                             iface, 0);
 #endif
             if (UCS_OK != status) {
                 return status;
@@ -318,7 +316,7 @@ ucs_status_t uct_cuda_base_iface_flush(uct_iface_h tl_iface, unsigned flags,
 void uct_cuda_base_stream_destroy(CUstream *stream)
 {
     if (*stream != NULL) {
-        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuStreamDestroy(*stream));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuStreamDestroy, *stream);
     }
 }
 
@@ -327,15 +325,15 @@ uct_cuda_base_event_desc_init(ucs_mpool_t *mp, void *obj, void *chunk)
 {
     uct_cuda_event_desc_t *event_desc = obj;
 
-    UCT_CUDADRV_FUNC_LOG_ERR(cuEventCreate(&event_desc->event,
-                                           CU_EVENT_DISABLE_TIMING));
+    UCT_CUDADRV_FUNC_LOG_ERR(cuEventCreate, &event_desc->event,
+                             CU_EVENT_DISABLE_TIMING);
 }
 
 static void uct_cuda_base_event_desc_cleanup(ucs_mpool_t *mp, void *obj)
 {
     uct_cuda_event_desc_t *event_desc = obj;
 
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuEventDestroy(event_desc->event));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuEventDestroy, event_desc->event);
 }
 
 void uct_cuda_base_queue_desc_init(uct_cuda_queue_desc_t *qdesc)
@@ -375,7 +373,7 @@ ucs_status_t uct_cuda_base_ctx_rsc_create(uct_cuda_iface_t *iface,
     uct_cuda_ctx_rsc_t *ctx_rsc;
     ucs_mpool_params_t mp_params;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetCurrent(&ctx));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetCurrent, &ctx);
     if (status != UCS_OK) {
         return status;
     } else if (ctx == NULL) {
diff --git a/src/uct/cuda/base/cuda_iface.h b/src/uct/cuda/base/cuda_iface.h
index b17ddfc55f6..214835c59ca 100644
--- a/src/uct/cuda/base/cuda_iface.h
+++ b/src/uct/cuda/base/cuda_iface.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018. ALL RIGHTS RESERVED.
+ * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2026. ALL RIGHTS RESERVED.
  * See file LICENSE for terms.
  */
 
@@ -166,8 +166,8 @@ uct_cuda_base_init_stream(CUstream *stream)
         return UCS_OK;
     }
 
-    return UCT_CUDADRV_FUNC_LOG_ERR(
-            cuStreamCreate(stream, CU_STREAM_NON_BLOCKING));
+    return UCT_CUDADRV_FUNC_LOG_ERR(cuStreamCreate, stream,
+                                    CU_STREAM_NON_BLOCKING);
 }
 
 #endif
diff --git a/src/uct/cuda/base/cuda_md.c b/src/uct/cuda/base/cuda_md.c
index c0671ac45a1..2aca70b45f5 100644
--- a/src/uct/cuda/base/cuda_md.c
+++ b/src/uct/cuda/base/cuda_md.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2019. ALL RIGHTS RESERVED.
+ * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2026. ALL RIGHTS RESERVED.
  * See file LICENSE for terms.
  */
 
@@ -27,14 +27,13 @@ uct_cuda_base_query_md_resources(uct_component_t *component,
     char device_name[10];
     int i, num_gpus;
 
-    status = UCT_CUDADRV_FUNC(cuDeviceGetCount(&num_gpus), UCS_LOG_LEVEL_DIAG);
+    status = UCT_CUDADRV_FUNC_LOG_DIAG(cuDeviceGetCount, &num_gpus);
     if ((status != UCS_OK) || (num_gpus == 0)) {
         return uct_md_query_empty_md_resource(resources_p, num_resources_p);
     }
 
     for (i = 0; i < num_gpus; ++i) {
-        status = UCT_CUDADRV_FUNC(cuDeviceGet(&cuda_device, i),
-                                  UCS_LOG_LEVEL_DIAG);
+        status = UCT_CUDADRV_FUNC_LOG_DIAG(cuDeviceGet, &cuda_device, i);
         if (status != UCS_OK) {
             continue;
         }
@@ -57,7 +56,7 @@ uct_cuda_base_query_md_resources(uct_component_t *component,
 
 UCS_STATIC_INIT
 {
-    UCT_CUDADRV_FUNC_LOG_DEBUG(cuInit(0));
+    UCT_CUDADRV_FUNC_LOG_DEBUG(cuInit, 0);
 }
 
 UCS_STATIC_CLEANUP
diff --git a/src/uct/cuda/base/cuda_util.h b/src/uct/cuda/base/cuda_util.h
index 804c2ae1d70..9300b890eb2 100644
--- a/src/uct/cuda/base/cuda_util.h
+++ b/src/uct/cuda/base/cuda_util.h
@@ -7,8 +7,9 @@
 #ifndef UCT_CUDA_UTIL_H
 #define UCT_CUDA_UTIL_H
 
-#include <ucs/sys/topo/base/topo.h>
 #include <ucs/debug/log.h>
+#include <ucs/profile/profile_defs.h>
+#include <ucs/sys/topo/base/topo.h>
 
 #include <cuda.h>
 
@@ -16,35 +17,39 @@
 const char *uct_cuda_cu_get_error_string(CUresult result);
 
 
-#define UCT_CUDADRV_LOG(_func, _log_level, _result) \
+#define UCT_CUDADRV_LOG(_log_level, _func, _result) \
     ucs_log((_log_level), "%s failed: %s", UCS_PP_MAKE_STRING(_func), \
             uct_cuda_cu_get_error_string(_result))
 
 
-#define UCT_CUDADRV_FUNC(_func, _log_level) \
+#define UCT_CUDADRV_FUNC(_log_level, _func, ...) \
     ({ \
-        CUresult _result = (_func); \
+        CUresult _result = UCS_PROFILE_CALL_ALWAYS(_func, ##__VA_ARGS__); \
         ucs_status_t _status; \
         if (ucs_likely(_result == CUDA_SUCCESS)) { \
             _status = UCS_OK; \
         } else { \
-            UCT_CUDADRV_LOG(_func, _log_level, _result); \
+            UCT_CUDADRV_LOG(_log_level, _func, _result); \
             _status = UCS_ERR_IO_ERROR; \
         } \
         _status; \
     })
 
 
-#define UCT_CUDADRV_FUNC_LOG_ERR(_func) \
-    UCT_CUDADRV_FUNC(_func, UCS_LOG_LEVEL_ERROR)
+#define UCT_CUDADRV_FUNC_LOG_ERR(_func, ...) \
+    UCT_CUDADRV_FUNC(UCS_LOG_LEVEL_ERROR, _func, ##__VA_ARGS__)
+
+
+#define UCT_CUDADRV_FUNC_LOG_WARN(_func, ...) \
+    UCT_CUDADRV_FUNC(UCS_LOG_LEVEL_WARN, _func, ##__VA_ARGS__)
 
 
-#define UCT_CUDADRV_FUNC_LOG_WARN(_func) \
-    UCT_CUDADRV_FUNC(_func, UCS_LOG_LEVEL_WARN)
+#define UCT_CUDADRV_FUNC_LOG_DIAG(_func, ...) \
+    UCT_CUDADRV_FUNC(UCS_LOG_LEVEL_DIAG, _func, ##__VA_ARGS__)
 
 
-#define UCT_CUDADRV_FUNC_LOG_DEBUG(_func) \
-    UCT_CUDADRV_FUNC(_func, UCS_LOG_LEVEL_DEBUG)
+#define UCT_CUDADRV_FUNC_LOG_DEBUG(_func, ...) \
+    UCT_CUDADRV_FUNC(UCS_LOG_LEVEL_DEBUG, _func, ##__VA_ARGS__)
 
 
 /**
diff --git a/src/uct/cuda/cuda_copy/cuda_copy_ep.c b/src/uct/cuda/cuda_copy/cuda_copy_ep.c
index e7545ef7b69..8599eef1285 100644
--- a/src/uct/cuda/cuda_copy/cuda_copy_ep.c
+++ b/src/uct/cuda/cuda_copy/cuda_copy_ep.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2017-2019. ALL RIGHTS RESERVED.
+ * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2017-2026. ALL RIGHTS RESERVED.
  * See file LICENSE for terms.
  */
 
@@ -142,9 +142,9 @@ static ucs_status_t uct_cuda_copy_ep_push_memory_ctx(CUdeviceptr cuda_deviceptr,
     CUcontext cuda_context;
     ucs_status_t status;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuPointerGetAttribute(&cuda_context, CU_POINTER_ATTRIBUTE_CONTEXT,
-                                  cuda_deviceptr));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttribute, &cuda_context,
+                                      CU_POINTER_ATTRIBUTE_CONTEXT,
+                                      cuda_deviceptr);
     if (status != UCS_OK) {
         return status;
     }
@@ -154,7 +154,7 @@ static ucs_status_t uct_cuda_copy_ep_push_memory_ctx(CUdeviceptr cuda_deviceptr,
         return UCS_ERR_UNSUPPORTED;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(cuda_context));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, cuda_context);
     if (status != UCS_OK) {
         return status;
     }
@@ -230,7 +230,7 @@ static UCS_F_ALWAYS_INLINE ucs_status_t uct_cuda_copy_ctx_rsc_get(
 
         result = uct_cuda_ctx_get_id(NULL, &ctx_id);
         if (result != CUDA_SUCCESS) {
-            UCT_CUDADRV_LOG(cuCtxGetId, UCS_LOG_LEVEL_ERROR, result);
+            UCT_CUDADRV_LOG(UCS_LOG_LEVEL_ERROR, cuCtxGetId, result);
             status = UCS_ERR_IO_ERROR;
             goto err_pop_and_release;
         }
@@ -323,14 +323,14 @@ uct_cuda_copy_post_cuda_async_copy(uct_ep_h tl_ep, void *dst, void *src,
         goto out_pop_and_release;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuMemcpyAsync((CUdeviceptr)dst, (CUdeviceptr)src, length, *stream));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyAsync, (CUdeviceptr)dst,
+                                      (CUdeviceptr)src, length, *stream);
     if (ucs_unlikely(UCS_OK != status)) {
         goto err_mpool_put;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuEventRecord(cuda_event->event, *stream));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuEventRecord, cuda_event->event,
+                                      *stream);
     if (ucs_unlikely(UCS_OK != status)) {
         goto err_mpool_put;
     }
@@ -422,12 +422,12 @@ static UCS_F_ALWAYS_INLINE ucs_status_t uct_cuda_copy_ep_rma_short(
         goto out_pop_and_release;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyAsync(dst, src, length, *stream));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyAsync, dst, src, length, *stream);
     if (ucs_unlikely(status != UCS_OK)) {
         goto out_pop_and_release;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuStreamSynchronize(*stream));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuStreamSynchronize, *stream);
 
 out_pop_and_release:
     uct_cuda_ctx_pop_and_release(ctx.cuda_device, ctx.cuda_context);
diff --git a/src/uct/cuda/cuda_copy/cuda_copy_md.c b/src/uct/cuda/cuda_copy/cuda_copy_md.c
index 8735bc06715..ecde59ab966 100644
--- a/src/uct/cuda/cuda_copy/cuda_copy_md.c
+++ b/src/uct/cuda/cuda_copy/cuda_copy_md.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2017-2019. ALL RIGHTS RESERVED.
+ * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2017-2026. ALL RIGHTS RESERVED.
  * See file LICENSE for terms.
  */
 
@@ -93,16 +93,15 @@ int uct_cuda_copy_md_is_dmabuf_supported()
     int dmabuf_supported = 0;
     CUdevice cuda_device;
 
-    if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGet(&cuda_device, 0)) != UCS_OK) {
+    if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGet, &cuda_device, 0) != UCS_OK) {
         return 0;
     }
 
     /* Assume dmabuf support is uniform across all devices */
 #if CUDA_VERSION >= 11070
-    if (UCT_CUDADRV_FUNC_LOG_DEBUG(
-                cuDeviceGetAttribute(&dmabuf_supported,
-                                     CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED,
-                                     cuda_device)) != UCS_OK) {
+    if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuDeviceGetAttribute, &dmabuf_supported,
+                                   CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED,
+                                   cuda_device) != UCS_OK) {
         return 0;
     }
 #endif
@@ -165,9 +164,8 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_mem_reg,
 
     log_level = (flags & UCT_MD_MEM_FLAG_HIDE_ERRORS) ? UCS_LOG_LEVEL_DEBUG :
                 UCS_LOG_LEVEL_ERROR;
-    status    = UCT_CUDADRV_FUNC(cuMemHostRegister(address, length,
-                                                   CU_MEMHOSTREGISTER_PORTABLE),
-                                 log_level);
+    status    = UCT_CUDADRV_FUNC(log_level, cuMemHostRegister, address, length,
+                                 CU_MEMHOSTREGISTER_PORTABLE);
     if (status != UCS_OK) {
         return status;
     }
@@ -183,8 +181,7 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_mem_dereg,
     UCT_MD_MEM_DEREG_CHECK_PARAMS(params, 0);
 
     if (params->memh != &uct_cuda_dummy_memh) {
-        UCT_CUDADRV_FUNC(cuMemHostUnregister((void*)params->memh),
-                         UCS_LOG_LEVEL_DIAG);
+        UCT_CUDADRV_FUNC_LOG_DIAG(cuMemHostUnregister, (void*)params->memh);
     }
 
     return UCS_OK;
@@ -217,9 +214,9 @@ uct_cuda_copy_mem_alloc_fabric(uct_cuda_copy_md_t *md,
     prop.allocFlags.gpuDirectRDMACapable = 1;
 
     if (md->granularity == SIZE_MAX) {
-        status = UCT_CUDADRV_FUNC(cuMemGetAllocationGranularity(
-                &md->granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM),
-                log_level);
+        status = UCT_CUDADRV_FUNC(log_level, cuMemGetAllocationGranularity,
+                                  &md->granularity, &prop,
+                                  CU_MEM_ALLOC_GRANULARITY_MINIMUM);
         if (status != UCS_OK) {
             return status;
         }
@@ -227,24 +224,23 @@ uct_cuda_copy_mem_alloc_fabric(uct_cuda_copy_md_t *md,
 
     alloc_handle->length = ucs_align_up(alloc_handle->length, md->granularity);
 
-    status = UCT_CUDADRV_FUNC(cuMemCreate(&alloc_handle->generic_handle,
-                                          alloc_handle->length, &prop, 0),
-                              log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuMemCreate,
+                              &alloc_handle->generic_handle,
+                              alloc_handle->length, &prop, 0);
     if (status != UCS_OK) {
         return UCS_ERR_NO_MEMORY;
     }
 
-    status = UCT_CUDADRV_FUNC(cuMemAddressReserve(
-                                     &alloc_handle->ptr, alloc_handle->length,
-                                     md->granularity, 0, 0),
-                              log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuMemAddressReserve,
+                              &alloc_handle->ptr, alloc_handle->length,
+                              md->granularity, 0, 0);
     if (status != UCS_OK) {
         goto err_mem_release;
     }
 
-    status = UCT_CUDADRV_FUNC(cuMemMap(alloc_handle->ptr, alloc_handle->length,
-                                       0, alloc_handle->generic_handle, 0),
-                              log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuMemMap, alloc_handle->ptr,
+                              alloc_handle->length, 0,
+                              alloc_handle->generic_handle, 0);
     if (status != UCS_OK) {
         goto err_address_free;
     }
@@ -253,18 +249,15 @@ uct_cuda_copy_mem_alloc_fabric(uct_cuda_copy_md_t *md,
     access_desc.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
     access_desc.location.id   = cu_device;
 
-    status = UCT_CUDADRV_FUNC(cuMemSetAccess(
-                     alloc_handle->ptr, alloc_handle->length, &access_desc, 1),
-                     log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuMemSetAccess, alloc_handle->ptr,
+                              alloc_handle->length, &access_desc, 1);
     if (status != UCS_OK) {
         goto err_mem_unmap;
     }
 
-    status = UCT_CUDADRV_FUNC(
-            cuPointerGetAttribute(&allowed_types,
-                                  CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,
-                                  alloc_handle->ptr),
-            log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuPointerGetAttribute, &allowed_types,
+                              CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,
+                              alloc_handle->ptr);
     if (status != UCS_OK) {
         goto err_mem_unmap;
     } else if (!(allowed_types & CU_MEM_HANDLE_TYPE_FABRIC)) {
@@ -282,13 +275,13 @@ uct_cuda_copy_mem_alloc_fabric(uct_cuda_copy_md_t *md,
     return UCS_OK;
 
 err_mem_unmap:
-    UCT_CUDADRV_FUNC_LOG_DEBUG(
-            cuMemUnmap(alloc_handle->ptr, alloc_handle->length));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap, alloc_handle->ptr,
+                                    alloc_handle->length);
 err_address_free:
-    UCT_CUDADRV_FUNC_LOG_DEBUG(
-            cuMemAddressFree(alloc_handle->ptr, alloc_handle->length));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree, alloc_handle->ptr,
+                                    alloc_handle->length);
 err_mem_release:
-    UCT_CUDADRV_FUNC_LOG_DEBUG(cuMemRelease(alloc_handle->generic_handle));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease, alloc_handle->generic_handle);
 #endif
     return UCS_ERR_NO_MEMORY;
 }
@@ -316,8 +309,8 @@ static ucs_status_t uct_cuda_copy_set_ctx_sync_memops(int log_level)
 
     if (cuda_cuCtxSetFlags_func != NULL) {
         /* Synchronize future DMA operations for all memory types */
-        UCT_CUDADRV_FUNC(cuda_cuCtxSetFlags_func(CU_CTX_SYNC_MEMOPS),
-                         log_level);
+        UCT_CUDADRV_FUNC(log_level, cuda_cuCtxSetFlags_func,
+                         CU_CTX_SYNC_MEMOPS);
         return UCS_OK;
     }
 #endif
@@ -339,9 +332,8 @@ static void uct_cuda_copy_sync_memops(CUdeviceptr dptr, int is_vmm)
     }
 
     /* Synchronize for DMA for legacy memory types */
-    UCT_CUDADRV_FUNC_LOG_WARN(
-            cuPointerSetAttribute(&sync_memops_value,
-                                  CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr));
+    UCT_CUDADRV_FUNC_LOG_WARN(cuPointerSetAttribute, &sync_memops_value,
+                              CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr);
 }
 
 static ucs_status_t
@@ -396,9 +388,8 @@ uct_cuda_copy_mem_alloc(uct_md_h uct_md, size_t *length_p, void **address_p,
         }
 
         if (md->config.enable_fabric != UCS_YES) {
-            status = UCT_CUDADRV_FUNC(cuMemAlloc(&alloc_handle->ptr,
-                                                 alloc_handle->length),
-                                      log_level);
+            status = UCT_CUDADRV_FUNC(log_level, cuMemAlloc, &alloc_handle->ptr,
+                                      alloc_handle->length);
             if (status == UCS_OK) {
                 goto allocated;
             }
@@ -408,9 +399,9 @@ uct_cuda_copy_mem_alloc(uct_md_h uct_md, size_t *length_p, void **address_p,
                 alloc_handle->length);
         status = UCS_ERR_NO_MEMORY;
     } else if (mem_type == UCS_MEMORY_TYPE_CUDA_MANAGED) {
-        status = UCT_CUDADRV_FUNC(
-                cuMemAllocManaged(&alloc_handle->ptr, alloc_handle->length,
-                                  CU_MEM_ATTACH_GLOBAL), log_level);
+        status = UCT_CUDADRV_FUNC(log_level, cuMemAllocManaged,
+                                  &alloc_handle->ptr, alloc_handle->length,
+                                  CU_MEM_ATTACH_GLOBAL);
     } else {
         ucs_log(log_level,
                 "allocation mem_types supported: cuda, cuda-managed");
@@ -443,20 +434,20 @@ uct_cuda_copy_mem_release_fabric(uct_cuda_copy_alloc_handle_t *alloc_handle)
 #if HAVE_CUDA_FABRIC
     ucs_status_t status;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuMemRelease(alloc_handle->generic_handle));
+    status = UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease,
+                                       alloc_handle->generic_handle);
     if (status != UCS_OK) {
         return status;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuMemUnmap(alloc_handle->ptr, alloc_handle->length));
+    status = UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap, alloc_handle->ptr,
+                                       alloc_handle->length);
     if (status != UCS_OK) {
         return status;
     }
 
-    return UCT_CUDADRV_FUNC_LOG_ERR(
-            cuMemAddressFree(alloc_handle->ptr, alloc_handle->length));
+    return UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree, alloc_handle->ptr,
+                                     alloc_handle->length);
 #else
     return UCS_ERR_UNSUPPORTED;
 #endif
@@ -482,8 +473,8 @@ static int uct_cuda_copy_detect_vmm(const void *address,
     *vmm_mem_type = UCS_MEMORY_TYPE_UNKNOWN;
     *cuda_device  = CU_DEVICE_INVALID;
 
-    status = UCT_CUDADRV_FUNC_LOG_DEBUG(
-            cuMemGetAllocationPropertiesFromHandle(&prop, alloc_handle));
+    status = UCT_CUDADRV_FUNC_LOG_DEBUG(cuMemGetAllocationPropertiesFromHandle,
+                                        &prop, alloc_handle);
     if (status != UCS_OK) {
         goto out;
     }
@@ -502,7 +493,7 @@ static int uct_cuda_copy_detect_vmm(const void *address,
     }
 
 out:
-    UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease(alloc_handle));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease, alloc_handle);
     return 1;
 #else
     return 0;
@@ -518,7 +509,7 @@ static ucs_status_t uct_cuda_copy_mem_free(uct_md_h md, uct_mem_h memh)
     if (alloc_handle->is_vmm) {
         status = uct_cuda_copy_mem_release_fabric(alloc_handle);
     } else {
-        UCT_CUDADRV_FUNC(cuMemFree(alloc_handle->ptr), UCS_LOG_LEVEL_DIAG);
+        (void)UCT_CUDADRV_FUNC_LOG_DIAG(cuMemFree, alloc_handle->ptr);
         status = UCS_OK;
     }
 
@@ -544,13 +535,14 @@ static size_t uct_cuda_copy_md_get_total_device_mem(CUdevice cuda_device)
     pthread_mutex_lock(&lock);
 
     if (!total_bytes[cuda_device]) {
-        if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceTotalMem(&total_bytes[cuda_device],
-                                                      cuda_device)) != UCS_OK) {
+        if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceTotalMem,
+                                     &total_bytes[cuda_device],
+                                     cuda_device) != UCS_OK) {
             goto err;
         }
 
-        if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetName(dev_name, sizeof(dev_name),
-                                                     cuda_device)) != UCS_OK) {
+        if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetName, dev_name,
+                                     sizeof(dev_name), cuda_device) != UCS_OK) {
             goto err;
         }
 
@@ -585,7 +577,7 @@ static void uct_cuda_copy_md_sync_memops_get_address_range(
     if (cuda_ctx == NULL) {
         status = uct_cuda_ctx_primary_push(cuda_device, 0, UCS_LOG_LEVEL_ERROR);
     } else {
-        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(cuda_ctx));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, cuda_ctx);
     }
     if (status != UCS_OK) {
         return;
@@ -600,9 +592,8 @@ static void uct_cuda_copy_md_sync_memops_get_address_range(
         goto out_ctx_pop;
     }
 
-    if (UCT_CUDADRV_FUNC_LOG_DEBUG(
-                cuMemGetAddressRange(&base_address, &alloc_length, address)) !=
-        UCS_OK) {
+    if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuMemGetAddressRange, &base_address,
+                                   &alloc_length, address) != UCS_OK) {
         goto out_ctx_pop;
     }
 
@@ -622,9 +613,9 @@ static void uct_cuda_copy_md_sync_memops_get_address_range(
     mem_info->alloc_length = alloc_length;
 
 out_ctx_pop:
-    UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(&tmp_ctx));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, &tmp_ctx);
     if (cuda_ctx == NULL) {
-        UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device);
     }
 }
 
@@ -660,10 +651,10 @@ uct_cuda_copy_md_query_attributes(const uct_cuda_copy_md_t *md,
         attr_type[3] = CU_POINTER_ATTRIBUTE_CONTEXT;
         attr_data[3] = &cuda_mem_ctx;
 
-        status = UCT_CUDADRV_FUNC_LOG_ERR(
-                cuPointerGetAttributes(ucs_static_array_size(attr_data),
-                                       attr_type, attr_data,
-                                       (CUdeviceptr)address));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttributes,
+                                          ucs_static_array_size(attr_data),
+                                          attr_type, attr_data,
+                                          (CUdeviceptr)address);
         if (status != UCS_OK) {
             /* pointer not recognized */
             return status;
diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc.inl b/src/uct/cuda/cuda_ipc/cuda_ipc.inl
index 0fa6c3beec6..171d0eaeeef 100644
--- a/src/uct/cuda/cuda_ipc/cuda_ipc.inl
+++ b/src/uct/cuda/cuda_ipc/cuda_ipc.inl
@@ -29,9 +29,9 @@ uct_cuda_ipc_check_and_push_ctx(CUdeviceptr address, CUdevice *cuda_device_p,
     attr_type[1] = CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL;
     attr_data[1] = &cuda_device_ordinal;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuPointerGetAttributes(UCT_CUDA_IPC_NUM_ATTRS, attr_type, attr_data,
-                                   address));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttributes,
+                                      UCT_CUDA_IPC_NUM_ATTRS, attr_type,
+                                      attr_data, address);
     if (ucs_unlikely(status != UCS_OK)) {
         return status;
     }
@@ -39,8 +39,8 @@ uct_cuda_ipc_check_and_push_ctx(CUdeviceptr address, CUdevice *cuda_device_p,
     ucs_assertv(cuda_device_ordinal >= 0, "cuda_device_ordinal=%d",
                 cuda_device_ordinal);
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&cuda_device,
-                                                  cuda_device_ordinal));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cuda_device,
+                                      cuda_device_ordinal);
     if (ucs_unlikely(status != UCS_OK)) {
         return status;
     }
@@ -51,16 +51,16 @@ uct_cuda_ipc_check_and_push_ctx(CUdeviceptr address, CUdevice *cuda_device_p,
            return status;
         }
 
-        UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device);
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetCurrent(&cuda_ctx_current));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetCurrent, &cuda_ctx_current);
     if (ucs_unlikely(status != UCS_OK)) {
         return status;
     }
 
     if (cuda_ctx != cuda_ctx_current) {
-        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(cuda_ctx));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, cuda_ctx);
         if (ucs_unlikely(status != UCS_OK)) {
             return status;
         }
@@ -78,7 +78,7 @@ static UCS_F_ALWAYS_INLINE void
 uct_cuda_ipc_check_and_pop_ctx(int is_ctx_pushed)
 {
     if (is_ctx_pushed) {
-        UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
     }
 }
 
diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c
index f1484418abe..cb6f9dedbee 100644
--- a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c
+++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c
@@ -140,9 +140,9 @@ uct_cuda_ipc_primary_ctx_retain_and_push(CUdevice cuda_device)
         return status;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(cuda_ctx));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, cuda_ctx);
     if (status != UCS_OK) {
-        UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device);
     }
 
     return status;
@@ -150,8 +150,8 @@ uct_cuda_ipc_primary_ctx_retain_and_push(CUdevice cuda_device)
 
 static void uct_cuda_ipc_primary_ctx_pop_and_release(CUdevice cuda_device)
 {
-    UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
-    UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(cuda_device));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, cuda_device);
 }
 
 static ucs_status_t
@@ -164,8 +164,8 @@ uct_cuda_ipc_close_memhandle_legacy(uct_cuda_ipc_cache_region_t *region)
         return status;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_WARN(
-            cuIpcCloseMemHandle((CUdeviceptr)region->mapped_addr));
+    status = UCT_CUDADRV_FUNC_LOG_WARN(cuIpcCloseMemHandle,
+                                       (CUdeviceptr)region->mapped_addr);
     uct_cuda_ipc_primary_ctx_pop_and_release(region->cu_dev);
     return status;
 }
@@ -176,17 +176,19 @@ static ucs_status_t uct_cuda_ipc_close_memhandle(uct_cuda_ipc_cache_region_t *re
     ucs_status_t status;
 
     if (region->key.ph.handle_type == UCT_CUDA_IPC_KEY_HANDLE_TYPE_VMM) {
-        status = UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap(
-                    (CUdeviceptr)region->mapped_addr, region->key.b_len));
+        status = UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap,
+                                           (CUdeviceptr)region->mapped_addr,
+                                           region->key.b_len);
         if (status != UCS_OK) {
             return status;
         }
 
-        return UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree(
-                (CUdeviceptr)region->mapped_addr, region->key.b_len));
+        return UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree,
+                                         (CUdeviceptr)region->mapped_addr,
+                                         region->key.b_len);
     } else if (region->key.ph.handle_type == UCT_CUDA_IPC_KEY_HANDLE_TYPE_MEMPOOL) {
-        return UCT_CUDADRV_FUNC_LOG_WARN(
-                cuMemFree((CUdeviceptr)region->mapped_addr));
+        return UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree,
+                                         (CUdeviceptr)region->mapped_addr);
     }
 #endif
     return uct_cuda_ipc_close_memhandle_legacy(region);
@@ -281,8 +283,8 @@ uct_cuda_ipc_open_memhandle_legacy(CUipcMemHandle memh, CUdevice cu_dev,
         return status;
     }
 
-    cuerr = cuIpcOpenMemHandle(mapped_addr, memh,
-                               CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS);
+    cuerr = UCS_PROFILE_CALL_ALWAYS(cuIpcOpenMemHandle, mapped_addr, memh,
+                                    CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS);
     if (cuerr != CUDA_SUCCESS) {
         ucs_log(log_level, "cuIpcOpenMemHandle() failed: %s",
                 uct_cuda_cu_get_error_string(cuerr));
@@ -313,29 +315,29 @@ uct_cuda_ipc_open_memhandle_vmm(const uct_cuda_ipc_rkey_t *key, CUdevice cu_dev,
     CUdeviceptr dptr;
     CUmemGenericAllocationHandle handle;
 
-    status = UCT_CUDADRV_FUNC(cuMemImportFromShareableHandle(&handle,
-                (void*)&key->ph.handle.fabric_handle,
-                CU_MEM_HANDLE_TYPE_FABRIC), log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuMemImportFromShareableHandle,
+                              &handle, (void*)&key->ph.handle.fabric_handle,
+                              CU_MEM_HANDLE_TYPE_FABRIC);
     if (status != UCS_OK) {
         goto out;
     }
 
-    status = UCT_CUDADRV_FUNC(cuMemAddressReserve(&dptr, key->b_len, 0, 0, 0),
-                              log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuMemAddressReserve, &dptr, key->b_len,
+                              0, 0, 0);
     if (status != UCS_OK) {
         goto release_handle;
     }
 
-    status = UCT_CUDADRV_FUNC(cuMemMap(dptr, key->b_len, 0, handle, 0),
-                              log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuMemMap, dptr, key->b_len, 0, handle,
+                              0);
     if (status != UCS_OK) {
         goto release_va_range;
     }
 
     uct_cuda_ipc_init_access_desc(&access_desc, cu_dev);
 
-    status = UCT_CUDADRV_FUNC(cuMemSetAccess(dptr, key->b_len, &access_desc, 1),
-                              log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuMemSetAccess, dptr, key->b_len,
+                              &access_desc, 1);
     if (status != UCS_OK) {
         goto unmap_range;
     }
@@ -344,11 +346,11 @@ uct_cuda_ipc_open_memhandle_vmm(const uct_cuda_ipc_rkey_t *key, CUdevice cu_dev,
     goto release_handle;
 
 unmap_range:
-    UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap(dptr, key->b_len));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemUnmap, dptr, key->b_len);
 release_va_range:
-    UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree(dptr, key->b_len));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemAddressFree, dptr, key->b_len);
 release_handle:
-    UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease(handle));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease, handle);
 out:
     return status;
 }
@@ -362,23 +364,23 @@ static ucs_status_t cuda_ipc_rem_mpool_cache_create(uct_cuda_ipc_rkey_t *key,
     CUdeviceptr dptr;
     ucs_status_t status;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolImportFromShareableHandle(
-                mpool, (void *)&key->ph.handle.fabric_handle,
-                CU_MEM_HANDLE_TYPE_FABRIC, 0));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolImportFromShareableHandle, mpool,
+                                      (void*)&key->ph.handle.fabric_handle,
+                                      CU_MEM_HANDLE_TYPE_FABRIC, 0);
     if (status != UCS_OK) {
         goto err;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolImportPointer(&dptr, *mpool,
-                (CUmemPoolPtrExportData*)&key->ph.ptr));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolImportPointer, &dptr, *mpool,
+                                      (CUmemPoolPtrExportData*)&key->ph.ptr);
     if (status != UCS_OK) {
         goto err_free_mpool;
     }
 
     uct_cuda_ipc_init_access_desc(&access_desc, cu_dev);
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-                cuMemPoolSetAccess(*mpool, &access_desc, 1));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolSetAccess, *mpool, &access_desc,
+                                      1);
     if (status != UCS_OK) {
         goto err_free_ptr;
     }
@@ -387,9 +389,9 @@ static ucs_status_t cuda_ipc_rem_mpool_cache_create(uct_cuda_ipc_rkey_t *key,
     return UCS_OK;
 
 err_free_ptr:
-    cuMemFree(dptr);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, dptr);
 err_free_mpool:
-    cuMemPoolDestroy(*mpool);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemPoolDestroy, *mpool);
 err:
     return status;
 }
@@ -441,8 +443,9 @@ uct_cuda_ipc_open_memhandle_mempool(uct_cuda_ipc_rkey_t *key, CUdevice cu_dev,
     }
 
 out_import_pointer:
-    status = UCT_CUDADRV_FUNC(cuMemPoolImportPointer(mapped_addr, key->ph.pool,
-            (CUmemPoolPtrExportData*)&key->ph.ptr), log_level);
+    status = UCT_CUDADRV_FUNC(log_level, cuMemPoolImportPointer, mapped_addr,
+                              key->ph.pool,
+                              (CUmemPoolPtrExportData*)&key->ph.ptr);
 
 err:
     pthread_rwlock_unlock(&uct_cuda_ipc_rem_mpool_cache.lock);
@@ -610,7 +613,7 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle,
     CUuuid uuid;
     int ret;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetUuid(&uuid, cu_dev));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetUuid, &uuid, cu_dev);
     if (status != UCS_OK) {
         return status;
     }
diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c
index e8b62347d1e..82b993e6876 100644
--- a/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c
+++ b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c
@@ -1,11 +1,11 @@
 /**
- * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2019. ALL RIGHTS RESERVED.
+ * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2026. ALL RIGHTS RESERVED.
  * See file LICENSE for terms.
  */
 
- #ifdef HAVE_CONFIG_H
- #  include "config.h"
- #endif
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
 
 #include <uct/cuda/base/cuda_iface.h>
 #include <uct/api/uct_def.h>
@@ -43,7 +43,8 @@ static UCS_CLASS_INIT_FUNC(uct_cuda_ipc_ep_t, const uct_ep_params_t *params)
 static UCS_CLASS_CLEANUP_FUNC(uct_cuda_ipc_ep_t)
 {
     if (self->device_ep != NULL) {
-        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree((CUdeviceptr)self->device_ep));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree,
+                                        (CUdeviceptr)self->device_ep);
     }
 }
 
@@ -76,7 +77,7 @@ static UCS_F_ALWAYS_INLINE ucs_status_t uct_cuda_ipc_ctx_rsc_get(
 
     result = uct_cuda_ctx_get_id(NULL, &ctx_id);
     if (ucs_unlikely(result != CUDA_SUCCESS)) {
-        UCT_CUDADRV_LOG(cuCtxGetId, UCS_LOG_LEVEL_ERROR, result);
+        UCT_CUDADRV_LOG(UCS_LOG_LEVEL_ERROR, cuCtxGetId, result);
         return UCS_ERR_IO_ERROR;
     }
 
@@ -159,15 +160,15 @@ uct_cuda_ipc_post_cuda_async_copy(uct_ep_h tl_ep, uint64_t remote_addr,
     src = (CUdeviceptr)
         ((direction == UCT_CUDA_IPC_PUT) ? iov[0].buffer : mapped_rem_addr);
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyDtoDAsync(dst, src, iov[0].length,
-                                                        *stream));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyDtoDAsync, dst, src,
+                                      iov[0].length, *stream);
     if (UCS_OK != status) {
         ucs_mpool_put(cuda_ipc_event);
         goto out;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuEventRecord(cuda_ipc_event->super.event,
-                                                    *stream));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuEventRecord,
+                                      cuda_ipc_event->super.event, *stream);
     if (UCS_OK != status) {
         ucs_mpool_put(cuda_ipc_event);
         goto out;
@@ -247,14 +248,14 @@ ucs_status_t uct_cuda_ipc_ep_get_device_ep(uct_ep_h tl_ep,
     }
 
     device_ep.uct_tl_id = UCT_DEVICE_TL_CUDA_IPC;
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuMemAlloc((CUdeviceptr *)&ep->device_ep, sizeof(uct_device_ep_t)));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc, (CUdeviceptr*)&ep->device_ep,
+                                      sizeof(uct_device_ep_t));
     if (status != UCS_OK) {
         goto err;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuMemcpyHtoD((CUdeviceptr)ep->device_ep, &device_ep, sizeof(uct_device_ep_t)));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemcpyHtoD, (CUdeviceptr)ep->device_ep,
+                                      &device_ep, sizeof(uct_device_ep_t));
     if (status != UCS_OK) {
         goto err_free_mem;
     }
@@ -263,7 +264,7 @@ ucs_status_t uct_cuda_ipc_ep_get_device_ep(uct_ep_h tl_ep,
     *device_ep_p = ep->device_ep;
     return UCS_OK;
 err_free_mem:
-    cuMemFree((CUdeviceptr)ep->device_ep);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, (CUdeviceptr)ep->device_ep);
     ep->device_ep = NULL;
 err:
     return status;
diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c b/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c
index ef41ec5263c..884143fdfa1 100644
--- a/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c
+++ b/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2019. ALL RIGHTS RESERVED.
+ * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018-2026. ALL RIGHTS RESERVED.
  * See file LICENSE for terms.
  */
 
@@ -157,15 +157,14 @@ static double uct_cuda_ipc_iface_get_bw()
     int major_version;
     ucs_status_t status;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&cu_device, 0));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cu_device, 0);
     if (status != UCS_OK) {
         return 0;
     }
 
     status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuDeviceGetAttribute(&major_version,
-                                 CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
-                                 cu_device));
+            cuDeviceGetAttribute, &major_version,
+            CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cu_device);
     if (status != UCS_OK) {
         return 0;
     }
diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_md.c b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c
index 3f6173def7d..662cb3fd8a3 100644
--- a/src/uct/cuda/cuda_ipc/cuda_ipc_md.c
+++ b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c
@@ -63,7 +63,7 @@ static uct_cuda_ipc_dev_cache_t *uct_cuda_ipc_create_dev_cache(int dev_num)
     ucs_status_t status;
     int i, num_devices;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount(&num_devices));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount, &num_devices);
     if (UCS_OK != status) {
         return NULL;
     }
@@ -163,15 +163,15 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
         goto out;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuMemGetAddressRange(&key->d_bptr, &key->b_len, (CUdeviceptr)addr));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemGetAddressRange, &key->d_bptr,
+                                      &key->b_len, (CUdeviceptr)addr);
     if (status != UCS_OK) {
         goto out_pop_ctx;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttribute(&key->ph.buffer_id,
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttribute, &key->ph.buffer_id,
                                       CU_POINTER_ATTRIBUTE_BUFFER_ID,
-                                      (CUdeviceptr)addr));
+                                      (CUdeviceptr)addr);
     if (status != UCS_OK) {
         goto out_pop_ctx;
     }
@@ -187,9 +187,9 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
     attr_type[2] = CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE;
     attr_data[2] = &mempool;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuPointerGetAttributes(ucs_static_array_size(attr_data), attr_type,
-                attr_data, (CUdeviceptr)addr));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttributes,
+                                      ucs_static_array_size(attr_data),
+                                      attr_type, attr_data, (CUdeviceptr)addr);
     if (status != UCS_OK) {
         goto out_pop_ctx;
     }
@@ -202,15 +202,13 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
         goto non_ipc;
     }
 
-    status =
-        UCT_CUDADRV_FUNC(cuMemRetainAllocationHandle(&handle, addr),
-                UCS_LOG_LEVEL_DIAG);
+    status = UCT_CUDADRV_FUNC_LOG_DIAG(cuMemRetainAllocationHandle, &handle,
+                                       addr);
     if (status == UCS_OK) {
-        status =
-            UCT_CUDADRV_FUNC_LOG_ERR(cuMemExportToShareableHandle(
-                        &key->ph.handle.fabric_handle, handle,
-                        CU_MEM_HANDLE_TYPE_FABRIC, 0));
-        UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease(handle));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemExportToShareableHandle,
+                                          &key->ph.handle.fabric_handle, handle,
+                                          CU_MEM_HANDLE_TYPE_FABRIC, 0);
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemRelease, handle);
         if (status != UCS_OK) {
             ucs_debug("unable to export handle for VMM ptr: %p", addr);
             goto non_ipc;
@@ -229,16 +227,16 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
         goto out_pop_ctx;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolExportToShareableHandle(
-                (void *)&key->ph.handle.fabric_handle, mempool,
-                CU_MEM_HANDLE_TYPE_FABRIC, 0));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolExportToShareableHandle,
+                                      (void*)&key->ph.handle.fabric_handle,
+                                      mempool, CU_MEM_HANDLE_TYPE_FABRIC, 0);
     if (status != UCS_OK) {
         ucs_debug("unable to export handle for mempool ptr: %p", addr);
         goto non_ipc;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolExportPointer(&key->ph.ptr,
-                (CUdeviceptr)key->d_bptr));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemPoolExportPointer, &key->ph.ptr,
+                                      (CUdeviceptr)key->d_bptr);
     if (status != UCS_OK) {
         goto out_pop_ctx;
     }
@@ -253,8 +251,8 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh,
 #endif
 legacy_path:
     key->ph.handle_type = UCT_CUDA_IPC_KEY_HANDLE_TYPE_LEGACY;
-    status              = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuIpcGetMemHandle(&key->ph.handle.legacy, (CUdeviceptr)addr));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuIpcGetMemHandle, &key->ph.handle.legacy,
+                                      (CUdeviceptr)addr);
     if (status != UCS_OK) {
         goto out_pop_ctx;
     }
@@ -323,8 +321,8 @@ uct_cuda_ipc_mkey_pack(uct_md_h md, uct_mem_h tl_memh, void *address,
         packed->pid |= UCT_CUDA_IPC_RKEY_FLAG_PID_NS;
     }
 
-    return UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetUuid(&packed->uuid,
-                                                    memh->dev_num));
+    return UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetUuid, &packed->uuid,
+                                    memh->dev_num);
 }
 
 static ucs_status_t
@@ -570,7 +568,7 @@ uct_cuda_ipc_md_mem_elem_pack(uct_md_h md, uct_mem_h memh, uct_rkey_t rkey,
     CUdevice cuda_device;
     void *mapped_addr;
 
-    if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuCtxGetDevice(&cuda_device)) != UCS_OK) {
+    if (UCT_CUDADRV_FUNC_LOG_DEBUG(cuCtxGetDevice, &cuda_device) != UCS_OK) {
         return UCS_ERR_UNREACHABLE;
     }
 
@@ -632,7 +630,7 @@ ucs_status_t uct_cuda_ipc_rkey_ptr(uct_component_t *component, uct_rkey_t rkey,
     CUdevice cu_dev;
     ucs_status_t status;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetDevice(&cu_dev));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxGetDevice, &cu_dev);
     if (ucs_unlikely(status != UCS_OK)) {
         return status;
     }
diff --git a/src/uct/ib/mlx5/gdaki/gdaki.c b/src/uct/ib/mlx5/gdaki/gdaki.c
index c243327b682..e4320fa9f11 100644
--- a/src/uct/ib/mlx5/gdaki/gdaki.c
+++ b/src/uct/ib/mlx5/gdaki/gdaki.c
@@ -87,15 +87,15 @@ uct_rc_gdaki_alloc(size_t size, size_t align, void **p_buf, CUdeviceptr *p_orig)
     unsigned int flag = 1;
     ucs_status_t status;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc(p_orig, size + align - 1));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc, p_orig, size + align - 1);
     if (status != UCS_OK) {
         return status;
     }
 
     *p_buf = (void*)ucs_align_up_pow2_ptr(*p_orig, align);
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
-                                  (CUdeviceptr)*p_buf));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerSetAttribute, &flag,
+                                      CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
+                                      (CUdeviceptr)*p_buf);
     if (status != UCS_OK) {
         goto err;
     }
@@ -103,7 +103,7 @@ uct_rc_gdaki_alloc(size_t size, size_t align, void **p_buf, CUdeviceptr *p_orig)
     return UCS_OK;
 
 err:
-    cuMemFree(*p_orig);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, *p_orig);
     return status;
 }
 
@@ -134,7 +134,7 @@ static int uct_gdaki_check_umem_dmabuf(const uct_ib_md_t *md)
     uct_cuda_copy_md_dmabuf_t dmabuf;
     CUdeviceptr buff;
 
-    if (UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc(&buff, 1)) != UCS_OK) {
+    if (UCT_CUDADRV_FUNC_LOG_ERR(cuMemAlloc, &buff, 1) != UCS_OK) {
         goto out;
     }
 
@@ -162,7 +162,7 @@ static int uct_gdaki_check_umem_dmabuf(const uct_ib_md_t *md)
 out_close:
     ucs_close_fd(&dmabuf.fd);
 out_free:
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree(buff));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, buff);
 out:
 #endif
     return ret;
@@ -245,7 +245,7 @@ uct_rc_gdaki_init_umem(uct_rc_gdaki_iface_t *iface, uint64_t pgsz_bitmap,
                                            uct_ib_md_t);
     ucs_status_t status;
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(iface->cuda_ctx));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, iface->cuda_ctx);
     if (status != UCS_OK) {
         return status;
     }
@@ -266,15 +266,15 @@ uct_rc_gdaki_init_umem(uct_rc_gdaki_iface_t *iface, uint64_t pgsz_bitmap,
         goto err_umem;
     }
 
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
     return UCS_OK;
 
 err_umem:
-    cuMemFree(mem->gpu_raw);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, mem->gpu_raw);
     mem->gpu_mem = NULL;
     mem->gpu_raw = 0;
 out_ctx:
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
     return status;
 }
 
@@ -441,7 +441,7 @@ uct_rc_gdaki_pool_chunk_alloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p)
 
 err_umem:
     mlx5dv_devx_umem_dereg(hdr->umem);
-    cuMemFree(hdr->gpu_raw);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, hdr->gpu_raw);
 err_free_hdr:
     ucs_free(hdr);
     return status;
@@ -461,7 +461,7 @@ static void uct_rc_gdaki_pool_chunk_release(ucs_mpool_t *mp, void *chunk)
                                         mp_chunk->num_elems,
                                         iface->num_channels - 1);
     mlx5dv_devx_umem_dereg(hdr->umem);
-    cuMemFree(hdr->gpu_raw);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, hdr->gpu_raw);
     ucs_free(hdr);
 }
 
@@ -591,7 +591,7 @@ uct_rc_gdaki_ep_init_channels_direct(uct_rc_gdaki_iface_t *iface,
 
 err_umem:
     mlx5dv_devx_umem_dereg(ep->mem.umem);
-    cuMemFree(ep->mem.gpu_raw);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, ep->mem.gpu_raw);
 err_block:
     ucs_free(ep->channel_block);
     uct_rc_gdaki_ep_reset_channels(ep);
@@ -608,7 +608,7 @@ static void uct_rc_gdaki_cleanup_channels_direct(uct_rc_gdaki_iface_t *iface,
     uct_rc_gdaki_chunk_channels_destroy(iface, NULL, ep->channel_block, 1, 1,
                                         iface->num_channels - 1);
     mlx5dv_devx_umem_dereg(ep->mem.umem);
-    cuMemFree(ep->mem.gpu_raw);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree, ep->mem.gpu_raw);
     ucs_free(ep->channel_block);
     uct_rc_gdaki_ep_reset_channels(ep);
 }
@@ -656,7 +656,8 @@ static UCS_CLASS_CLEANUP_FUNC(uct_rc_gdaki_ep_t)
     if (self->dev_ep_init) {
         uct_rc_gdaki_channel_t *channels = self->channel_block->channels;
         for (i = 0; i < iface->num_channels; i++) {
-            (void)cuMemHostUnregister(channels[i].qp.reg->addr.ptr);
+            (void)UCS_PROFILE_CALL_ALWAYS(cuMemHostUnregister,
+                                          channels[i].qp.reg->addr.ptr);
         }
     }
     uct_rc_gdaki_ep_cleanup_channels(iface, self);
@@ -832,7 +833,7 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p)
     pthread_mutex_lock(&iface->ep_init_lock);
 
     if (!ep->dev_ep_init) {
-        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(iface->cuda_ctx));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, iface->cuda_ctx);
         if (status != UCS_OK) {
             goto out_unlock;
         }
@@ -849,8 +850,8 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p)
         }
 
         status = UCT_CUDADRV_FUNC_LOG_ERR(
-                cuMemsetD8((CUdeviceptr)ep->channel_block->gpu_ptr, 0,
-                           dev_ep_host_size));
+                cuMemsetD8, (CUdeviceptr)ep->channel_block->gpu_ptr, 0,
+                dev_ep_host_size);
         if (status != UCS_OK) {
             goto out_free;
         }
@@ -868,15 +869,15 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p)
 
         for (i = 0; i < iface->num_channels; ++i) {
             channel = &ep->channel_block->channels[i];
-            (void)cuMemHostRegister(channel->qp.reg->addr.ptr,
-                                    UCT_IB_MLX5_BF_REG_SIZE * 2,
-                                    CU_MEMHOSTREGISTER_PORTABLE |
-                                    CU_MEMHOSTREGISTER_DEVICEMAP |
-                                    CU_MEMHOSTREGISTER_IOMEMORY);
-
-            status = UCT_CUDADRV_FUNC_LOG_ERR(
-                    cuMemHostGetDevicePointer(&sq_db, channel->qp.reg->addr.ptr,
-                                              0));
+            (void)UCS_PROFILE_CALL_ALWAYS(cuMemHostRegister,
+                                          channel->qp.reg->addr.ptr,
+                                          UCT_IB_MLX5_BF_REG_SIZE * 2,
+                                          CU_MEMHOSTREGISTER_PORTABLE |
+                                                  CU_MEMHOSTREGISTER_DEVICEMAP |
+                                                  CU_MEMHOSTREGISTER_IOMEMORY);                
+
+            status = UCT_CUDADRV_FUNC_LOG_ERR(cuMemHostGetDevicePointer, &sq_db,
+                                              channel->qp.reg->addr.ptr, 0);
             if (status != UCS_OK) {
                 goto out_unreg;
             }
@@ -889,14 +890,14 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p)
         }
 
         status = UCT_CUDADRV_FUNC_LOG_ERR(
-                cuMemcpyHtoD((CUdeviceptr)ep->channel_block->gpu_ptr, dev_ep,
-                             dev_ep_host_size));
+                cuMemcpyHtoD, (CUdeviceptr)ep->channel_block->gpu_ptr, dev_ep,
+                dev_ep_host_size);
         if (status != UCS_OK) {
             goto out_unreg;
         }
 
         ucs_free(dev_ep);
-        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
 
         ep->dev_ep_init = 1;
     }
@@ -907,13 +908,14 @@ uct_rc_gdaki_ep_get_device_ep(uct_ep_h tl_ep, uct_device_ep_h *device_ep_p)
 
 out_unreg:
     do {
-        (void)cuMemHostUnregister(
+        (void)UCS_PROFILE_CALL_ALWAYS(
+                cuMemHostUnregister,
                 ep->channel_block->channels[i].qp.reg->addr.ptr);
     } while (i-- > 0);
 out_free:
     ucs_free(dev_ep);
 out_ctx:
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
 out_unlock:
     pthread_mutex_unlock(&iface->ep_init_lock);
     return status;
@@ -1024,24 +1026,24 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
     }
 
     cuda_id = atoi(gpu_name + UCT_DEVICE_CUDA_NAME_LEN);
-    status  = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuDeviceGetPCIBusId(pci_addr, UCS_SYS_BDF_NAME_MAX, cuda_id));
+    status  = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetPCIBusId, pci_addr,
+                                       UCS_SYS_BDF_NAME_MAX, cuda_id);
     if (status != UCS_OK) {
         return status;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&self->cuda_dev, cuda_id));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &self->cuda_dev, cuda_id);
     if (status != UCS_OK) {
         return status;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(
-            cuDevicePrimaryCtxRetain(&self->cuda_ctx, self->cuda_dev));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain, &self->cuda_ctx,
+                                      self->cuda_dev);
     if (status != UCS_OK) {
         return status;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(self->cuda_ctx));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, self->cuda_ctx);
     if (status != UCS_OK) {
         goto err_ctx_release;
     }
@@ -1074,7 +1076,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
         }
     }
 
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
     return UCS_OK;
 
 err_pool:
@@ -1084,9 +1086,9 @@ static UCS_CLASS_INIT_FUNC(uct_rc_gdaki_iface_t, uct_md_h tl_md,
 err_atomic_buff:
     ucs_free(self->atomic_buff);
 err_ctx:
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
 err_ctx_release:
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(self->cuda_dev));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, self->cuda_dev);
     return status;
 }
 
@@ -1095,12 +1097,12 @@ static UCS_CLASS_CLEANUP_FUNC(uct_rc_gdaki_iface_t)
     pthread_mutex_destroy(&self->ep_init_lock);
     ibv_dereg_mr(self->atomic_mr);
     ucs_free(self->atomic_buff);
-    (void)UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(self->cuda_ctx));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPushCurrent, self->cuda_ctx);
     if (self->ep_alloc_mode == UCT_RC_GDAKI_EP_ALLOC_MODE_POOL) {
         uct_rc_gdaki_iface_cleanup_channel_pool(self);
     }
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
-    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(self->cuda_dev));
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
+    (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, self->cuda_dev);
 }
 
 UCS_CLASS_DEFINE(uct_rc_gdaki_iface_t, uct_rc_mlx5_iface_common_t);
@@ -1124,10 +1126,10 @@ static ucs_status_t uct_gdaki_md_check_uar(uct_ib_mlx5_md_t *md)
 
     flags  = CU_MEMHOSTREGISTER_PORTABLE | CU_MEMHOSTREGISTER_DEVICEMAP |
              CU_MEMHOSTREGISTER_IOMEMORY;
-    status = UCT_CUDADRV_FUNC_LOG_DEBUG(
-            cuMemHostRegister(uar->reg_addr, UCT_IB_MLX5_BF_REG_SIZE, flags));
+    status = UCT_CUDADRV_FUNC_LOG_DEBUG(cuMemHostRegister, uar->reg_addr,
+                                        UCT_IB_MLX5_BF_REG_SIZE, flags);
     if (status == UCS_OK) {
-        UCT_CUDADRV_FUNC_LOG_WARN(cuMemHostUnregister(uar->reg_addr));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuMemHostUnregister, uar->reg_addr);
     }
 
     mlx5dv_devx_free_uar(uar);
@@ -1270,7 +1272,7 @@ uct_gdaki_dev_matrix_init(const uct_ib_md_t *ib_md, size_t *dmat_length_p)
     }
 
     /* Get the number of CUDA devices */
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount(&cudadev_count));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGetCount, &cudadev_count);
     if (status != UCS_OK) {
         goto out;
     }
@@ -1287,8 +1289,8 @@ uct_gdaki_dev_matrix_init(const uct_ib_md_t *ib_md, size_t *dmat_length_p)
 
     /* Map each CUDA device to the best suited IB devices */
     for (cudadev_index = 0; cudadev_index < cudadev_count; cudadev_index++) {
-        status = UCT_CUDADRV_FUNC_LOG_ERR(
-                cuDeviceGet(&cuda_dev, cudadev_index));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cuda_dev,
+                                          cudadev_index);
         if (status != UCS_OK) {
             goto out;
         }
@@ -1351,7 +1353,7 @@ static CUdevice uct_gdaki_push_primary_ctx(int retain_inactive_ctx)
         return CU_DEVICE_INVALID;
     }
 
-    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&cuda_dev, 0));
+    status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &cuda_dev, 0);
     if (status != UCS_OK) {
         return CU_DEVICE_INVALID;
     }
@@ -1461,7 +1463,7 @@ uct_gdaki_query_tl_devices(uct_md_h tl_md,
 
     num_tl_devices = 0;
     ucs_for_each_bit(i, ibdesc->cuda_map) {
-        status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&device, i));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &device, i);
         if (status != UCS_OK) {
             goto err;
         }
diff --git a/test/gtest/uct/test_device.cc b/test/gtest/uct/test_device.cc
index 5dd8f7d650a..c2c770f34af 100644
--- a/test/gtest/uct/test_device.cc
+++ b/test/gtest/uct/test_device.cc
@@ -40,9 +40,9 @@ class test_device_cuda_ctx_guard {
 
     test_device_cuda_ctx_guard() : m_dev(0), m_ctx(NULL), m_is_active(false)
     {
-        (void)UCT_CUDADRV_FUNC_LOG_DEBUG(cuInit(0));
+        (void)UCT_CUDADRV_FUNC_LOG_DEBUG(cuInit, 0);
 
-        if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet(&m_dev, 0)) != UCS_OK) {
+        if (UCT_CUDADRV_FUNC_LOG_ERR(cuDeviceGet, &m_dev, 0) != UCS_OK) {
             return;
         }
 
@@ -65,13 +65,13 @@ class test_device_cuda_ctx_guard {
             return;
         }
 
-        if (UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain(&m_ctx, m_dev)) !=
+        if (UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain, &m_ctx, m_dev) !=
             UCS_OK) {
             return;
         }
 
-        if (UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(m_ctx)) != UCS_OK) {
-            (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(m_dev));
+        if (UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, m_ctx) != UCS_OK) {
+            (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, m_dev);
             return;
         }
 
@@ -84,8 +84,8 @@ class test_device_cuda_ctx_guard {
             return;
         }
 
-        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
-        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(m_dev));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, m_dev);
         m_is_active = false;
     }
 };
@@ -115,11 +115,11 @@ class test_device : public uct_test {
             return;
         }
 
-        status = UCT_CUDADRV_FUNC_LOG_ERR(
-                cuDevicePrimaryCtxRetain(&ctx, m_cuda_dev));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuDevicePrimaryCtxRetain, &ctx,
+                                          m_cuda_dev);
         ASSERT_UCS_OK(status);
 
-        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent(ctx));
+        status = UCT_CUDADRV_FUNC_LOG_ERR(cuCtxPushCurrent, ctx);
         ASSERT_UCS_OK(status);
     }
 
@@ -131,8 +131,8 @@ class test_device : public uct_test {
             return;
         }
 
-        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent(NULL));
-        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease(m_cuda_dev));
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuCtxPopCurrent, NULL);
+        (void)UCT_CUDADRV_FUNC_LOG_WARN(cuDevicePrimaryCtxRelease, m_cuda_dev);
     }
 
     entity *m_sender;