-
Notifications
You must be signed in to change notification settings - Fork 566
UCP/CUDA: Fix Valgrind failures with CUDA managed memory #11499
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -211,10 +211,11 @@ uct_cuda_base_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp) | |
| goto error; | ||
| } | ||
|
|
||
| flush_stream_desc->flush_desc = flush_desc; | ||
| flush_stream_desc->comp.func = uct_cuda_base_stream_flushed_cb; | ||
| flush_stream_desc->comp.count = 1; | ||
| flush_stream_desc->super.comp = &flush_stream_desc->comp; | ||
| flush_stream_desc->flush_desc = flush_desc; | ||
| flush_stream_desc->comp.func = uct_cuda_base_stream_flushed_cb; | ||
| flush_stream_desc->comp.count = 1; | ||
| flush_stream_desc->super.comp = &flush_stream_desc->comp; | ||
| flush_stream_desc->super.event = NULL; | ||
| ucs_queue_push(&q_desc->event_queue, &flush_stream_desc->super.queue); | ||
| flush_desc->stream_counter++; | ||
| } | ||
|
|
@@ -243,8 +244,7 @@ uct_cuda_base_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp) | |
| static UCS_F_ALWAYS_INLINE int | ||
| uct_cuda_base_event_is_flush(const uct_cuda_event_desc_t *event) | ||
| { | ||
| return (event->comp != NULL) && | ||
| (event->comp->func == uct_cuda_base_stream_flushed_cb); | ||
| return event->event == NULL; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the previous check not working here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It dereferences |
||
| } | ||
|
|
||
| static UCS_F_ALWAYS_INLINE unsigned | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -339,6 +339,7 @@ uct_cuda_copy_post_cuda_async_copy(uct_ep_h tl_ep, void *dst, void *src, | |
| ucs_queue_push(&iface->super.active_queue, &q_desc->queue); | ||
| } | ||
|
|
||
| VALGRIND_MAKE_MEM_DEFINED(&cuda_event->event, sizeof(cuda_event->event)); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i guess we need to do it after ucs_mpool_get, because the reason the event is valid is because it's cuda_event->event is coming from the mpool, and it should be probably before used in line 333 |
||
| ucs_queue_push(event_q, &cuda_event->queue); | ||
| cuda_event->comp = comp; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| #include <uct/base/uct_log.h> | ||
| #include <uct/base/uct_iov.inl> | ||
| #include <ucs/debug/memtrack_int.h> | ||
| #include <ucs/sys/checker.h> | ||
| #include <ucs/sys/math.h> | ||
| #include <ucs/type/class.h> | ||
| #include <ucs/profile/profile.h> | ||
|
|
@@ -177,6 +178,8 @@ uct_cuda_ipc_post_cuda_async_copy(uct_ep_h tl_ep, uint64_t remote_addr, | |
| ucs_queue_push(&iface->super.active_queue, &q_desc->queue); | ||
| } | ||
|
|
||
| VALGRIND_MAKE_MEM_DEFINED(&cuda_ipc_event->super.event, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here -- maybe need inline wrapper to get object (derived from) uct_cuda_event_desc_t from a memory pool and make ->event defined? |
||
| sizeof(cuda_ipc_event->super.event)); | ||
| ucs_queue_push(&q_desc->event_queue, &cuda_ipc_event->super.queue); | ||
| cuda_ipc_event->super.comp = comp; | ||
| cuda_ipc_event->mapped_addr = mapped_addr; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why needed with valgrind? is cuda/managed memory not present in memtype cache?