Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/ucp/core/ucp_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <ucs/memory/memory_type.h>
#include <ucs/memory/rcache.h>
#include <ucs/type/spinlock.h>
#include <ucs/sys/checker.h>
#include <ucs/sys/string.h>
#include <ucs/type/param.h>

Expand Down Expand Up @@ -717,6 +718,13 @@ ucp_memory_detect_internal(ucp_context_h context, const void *address,

status = ucs_memtype_cache_lookup(address, length, mem_info);
if (ucs_likely(status == UCS_ERR_NO_ELEM)) {
if (ucs_unlikely(RUNNING_ON_VALGRIND)) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why needed with valgrind? is cuda/managed memory not present in memtype cache?

ucs_trace_req("address %p length %zu: not found in memtype cache, "
"detecting memory type under Valgrind", address, length);
ucp_memory_detect_slowpath(context, address, length, mem_info);
return;
}

ucs_trace_req("address %p length %zu: not found in memtype cache, "
"assuming host memory",
address, length);
Expand Down
12 changes: 6 additions & 6 deletions src/uct/cuda/base/cuda_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,11 @@ uct_cuda_base_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp)
goto error;
}

flush_stream_desc->flush_desc = flush_desc;
flush_stream_desc->comp.func = uct_cuda_base_stream_flushed_cb;
flush_stream_desc->comp.count = 1;
flush_stream_desc->super.comp = &flush_stream_desc->comp;
flush_stream_desc->flush_desc = flush_desc;
flush_stream_desc->comp.func = uct_cuda_base_stream_flushed_cb;
flush_stream_desc->comp.count = 1;
flush_stream_desc->super.comp = &flush_stream_desc->comp;
flush_stream_desc->super.event = NULL;
ucs_queue_push(&q_desc->event_queue, &flush_stream_desc->super.queue);
flush_desc->stream_counter++;
}
Expand Down Expand Up @@ -243,8 +244,7 @@ uct_cuda_base_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp)
static UCS_F_ALWAYS_INLINE int
uct_cuda_base_event_is_flush(const uct_cuda_event_desc_t *event)
{
return (event->comp != NULL) &&
(event->comp->func == uct_cuda_base_stream_flushed_cb);
return event->event == NULL;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the previous check not working here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It dereferences comp->func, which can point into a UCP request. Valgrind may mark that request memory undefined/noaccess.

}

static UCS_F_ALWAYS_INLINE unsigned
Expand Down
1 change: 1 addition & 0 deletions src/uct/cuda/cuda_copy/cuda_copy_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ uct_cuda_copy_post_cuda_async_copy(uct_ep_h tl_ep, void *dst, void *src,
ucs_queue_push(&iface->super.active_queue, &q_desc->queue);
}

VALGRIND_MAKE_MEM_DEFINED(&cuda_event->event, sizeof(cuda_event->event));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess we need to do it after ucs_mpool_get, because the reason the event is valid is because it's cuda_event->event is coming from the mpool, and it should be probably before used in line 333

ucs_queue_push(event_q, &cuda_event->queue);
cuda_event->comp = comp;

Expand Down
3 changes: 3 additions & 0 deletions src/uct/cuda/cuda_ipc/cuda_ipc_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <uct/base/uct_log.h>
#include <uct/base/uct_iov.inl>
#include <ucs/debug/memtrack_int.h>
#include <ucs/sys/checker.h>
#include <ucs/sys/math.h>
#include <ucs/type/class.h>
#include <ucs/profile/profile.h>
Expand Down Expand Up @@ -177,6 +178,8 @@ uct_cuda_ipc_post_cuda_async_copy(uct_ep_h tl_ep, uint64_t remote_addr,
ucs_queue_push(&iface->super.active_queue, &q_desc->queue);
}

VALGRIND_MAKE_MEM_DEFINED(&cuda_ipc_event->super.event,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here -- maybe need inline wrapper to get object (derived from) uct_cuda_event_desc_t from a memory pool and make ->event defined?

sizeof(cuda_ipc_event->super.event));
ucs_queue_push(&q_desc->event_queue, &cuda_ipc_event->super.queue);
cuda_ipc_event->super.comp = comp;
cuda_ipc_event->mapped_addr = mapped_addr;
Expand Down
Loading