diff --git a/src/ucp/core/ucp_context.h b/src/ucp/core/ucp_context.h index 371ac68e380..6434f5070b0 100644 --- a/src/ucp/core/ucp_context.h +++ b/src/ucp/core/ucp_context.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -717,6 +718,13 @@ ucp_memory_detect_internal(ucp_context_h context, const void *address, status = ucs_memtype_cache_lookup(address, length, mem_info); if (ucs_likely(status == UCS_ERR_NO_ELEM)) { + if (ucs_unlikely(RUNNING_ON_VALGRIND)) { + ucs_trace_req("address %p length %zu: not found in memtype cache, " + "detecting memory type under Valgrind", address, length); + ucp_memory_detect_slowpath(context, address, length, mem_info); + return; + } + ucs_trace_req("address %p length %zu: not found in memtype cache, " "assuming host memory", address, length); diff --git a/src/uct/cuda/base/cuda_iface.c b/src/uct/cuda/base/cuda_iface.c index 4f788a2a27e..ba3c80d070f 100644 --- a/src/uct/cuda/base/cuda_iface.c +++ b/src/uct/cuda/base/cuda_iface.c @@ -211,10 +211,11 @@ uct_cuda_base_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp) goto error; } - flush_stream_desc->flush_desc = flush_desc; - flush_stream_desc->comp.func = uct_cuda_base_stream_flushed_cb; - flush_stream_desc->comp.count = 1; - flush_stream_desc->super.comp = &flush_stream_desc->comp; + flush_stream_desc->flush_desc = flush_desc; + flush_stream_desc->comp.func = uct_cuda_base_stream_flushed_cb; + flush_stream_desc->comp.count = 1; + flush_stream_desc->super.comp = &flush_stream_desc->comp; + flush_stream_desc->super.event = NULL; ucs_queue_push(&q_desc->event_queue, &flush_stream_desc->super.queue); flush_desc->stream_counter++; } @@ -243,8 +244,7 @@ uct_cuda_base_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp) static UCS_F_ALWAYS_INLINE int uct_cuda_base_event_is_flush(const uct_cuda_event_desc_t *event) { - return (event->comp != NULL) && - (event->comp->func == uct_cuda_base_stream_flushed_cb); + return event->event == NULL; } static UCS_F_ALWAYS_INLINE unsigned diff --git a/src/uct/cuda/cuda_copy/cuda_copy_ep.c b/src/uct/cuda/cuda_copy/cuda_copy_ep.c index e7545ef7b69..ad968c1bcca 100644 --- a/src/uct/cuda/cuda_copy/cuda_copy_ep.c +++ b/src/uct/cuda/cuda_copy/cuda_copy_ep.c @@ -339,6 +339,7 @@ uct_cuda_copy_post_cuda_async_copy(uct_ep_h tl_ep, void *dst, void *src, ucs_queue_push(&iface->super.active_queue, &q_desc->queue); } + VALGRIND_MAKE_MEM_DEFINED(&cuda_event->event, sizeof(cuda_event->event)); ucs_queue_push(event_q, &cuda_event->queue); cuda_event->comp = comp; diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c index e8b62347d1e..16c85969dbc 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -177,6 +178,8 @@ uct_cuda_ipc_post_cuda_async_copy(uct_ep_h tl_ep, uint64_t remote_addr, ucs_queue_push(&iface->super.active_queue, &q_desc->queue); } + VALGRIND_MAKE_MEM_DEFINED(&cuda_ipc_event->super.event, + sizeof(cuda_ipc_event->super.event)); ucs_queue_push(&q_desc->event_queue, &cuda_ipc_event->super.queue); cuda_ipc_event->super.comp = comp; cuda_ipc_event->mapped_addr = mapped_addr;