Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 58 additions & 48 deletions src/ucp/core/ucp_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -1284,50 +1284,64 @@ static int ucp_tl_resource_is_same_device(const uct_tl_resource_desc_t *resource
(resource1->sys_device == resource2->sys_device));
}

static void ucp_add_tl_resource_if_enabled(
ucp_context_h context, ucp_md_index_t md_index,
const ucp_config_t *config, const ucs_string_set_t *aux_tls,
const uct_tl_resource_desc_t *resource, unsigned *num_resources_p,
uint64_t dev_cfg_masks[], uint64_t *tl_cfg_mask)
static ucs_status_t
ucp_add_tl_resource_if_enabled(ucp_context_h context, ucp_md_index_t md_index,
const ucp_config_t *config,
const ucs_string_set_t *aux_tls,
const uct_tl_resource_desc_t *resource,
unsigned *num_resources_p,
uint64_t dev_cfg_masks[], uint64_t *tl_cfg_mask)
{
ucp_tl_md_t *md = &context->tl_mds[md_index];
uint8_t rsc_flags;
ucp_rsc_index_t dev_index, i;

if (ucp_is_resource_enabled(resource, config, aux_tls, &rsc_flags,
dev_cfg_masks, tl_cfg_mask)) {
if ((resource->sys_device != UCS_SYS_DEVICE_ID_UNKNOWN) &&
(resource->sys_device >= UCP_MAX_SYS_DEVICES)) {
ucs_diag(UCT_TL_RESOURCE_DESC_FMT
" system device is %d, which exceeds the maximal "
"supported (%d), system locality may be ignored",
UCT_TL_RESOURCE_DESC_ARG(resource), resource->sys_device,
UCP_MAX_SYS_DEVICES);
}
context->tl_rscs[context->num_tls].tl_rsc = *resource;
context->tl_rscs[context->num_tls].md_index = md_index;
context->tl_rscs[context->num_tls].tl_name_csum =
ucs_crc16_string(resource->tl_name);
context->tl_rscs[context->num_tls].flags = rsc_flags;

dev_index = 0;
for (i = 0; i < context->num_tls; ++i) {
if (ucp_tl_resource_is_same_device(&context->tl_rscs[i].tl_rsc, resource)) {
dev_index = context->tl_rscs[i].dev_index;
break;
} else {
dev_index = ucs_max(context->tl_rscs[i].dev_index + 1, dev_index);
}
}
context->tl_rscs[context->num_tls].dev_index = dev_index;
if (!ucp_is_resource_enabled(resource, config, aux_tls, &rsc_flags,
dev_cfg_masks, tl_cfg_mask)) {
return UCS_OK;
}

if (resource->sys_device < UCP_MAX_SYS_DEVICES) {
md->sys_dev_map |= UCS_BIT(resource->sys_device);
if (context->num_tls >= UCP_MAX_RESOURCES) {
ucs_error("exceeded transports/devices limit (up to %d are supported)",
UCP_MAX_RESOURCES);
return UCS_ERR_EXCEEDS_LIMIT;
}

if ((resource->sys_device != UCS_SYS_DEVICE_ID_UNKNOWN) &&
(resource->sys_device >= UCP_MAX_SYS_DEVICES)) {
ucs_diag(UCT_TL_RESOURCE_DESC_FMT
" system device is %d, which exceeds the maximal "
"supported (%d), system locality may be ignored",
UCT_TL_RESOURCE_DESC_ARG(resource), resource->sys_device,
UCP_MAX_SYS_DEVICES);
}

context->tl_rscs[context->num_tls].tl_rsc = *resource;
context->tl_rscs[context->num_tls].md_index = md_index;
context->tl_rscs[context->num_tls].tl_name_csum = ucs_crc16_string(
resource->tl_name);
context->tl_rscs[context->num_tls].flags = rsc_flags;

dev_index = 0;
for (i = 0; i < context->num_tls; ++i) {
if (ucp_tl_resource_is_same_device(&context->tl_rscs[i].tl_rsc,
resource)) {
dev_index = context->tl_rscs[i].dev_index;
break;
} else {
dev_index = ucs_max(context->tl_rscs[i].dev_index + 1, dev_index);
}
}
context->tl_rscs[context->num_tls].dev_index = dev_index;

++context->num_tls;
++(*num_resources_p);
if (resource->sys_device < UCP_MAX_SYS_DEVICES) {
md->sys_dev_map |= UCS_BIT(resource->sys_device);
}

++context->num_tls;
++(*num_resources_p);

return UCS_OK;
}

static ucs_status_t
Expand All @@ -1343,7 +1357,7 @@ ucp_add_tl_resources(ucp_context_h context, ucp_md_index_t md_index,
ucp_tl_resource_desc_t *tmp;
unsigned num_tl_resources;
ucs_status_t status;
ucp_rsc_index_t i;
unsigned i;

*num_resources_p = 0;

Expand Down Expand Up @@ -1381,9 +1395,13 @@ ucp_add_tl_resources(ucp_context_h context, ucp_md_index_t md_index,
"'%s'(%s)", tl_resources[i].dev_name,
context->tl_cmpts[md->cmpt_index].attr.name);
ucs_string_set_add(avail_tls, tl_resources[i].tl_name);
ucp_add_tl_resource_if_enabled(context, md_index, config, aux_tls,
&tl_resources[i], num_resources_p,
dev_cfg_masks, tl_cfg_mask);
status = ucp_add_tl_resource_if_enabled(context, md_index, config,
aux_tls, &tl_resources[i],
num_resources_p, dev_cfg_masks,
tl_cfg_mask);
if (status != UCS_OK) {
goto free_resources;
}
}

status = UCS_OK;
Expand Down Expand Up @@ -1696,14 +1714,6 @@ static ucs_status_t ucp_check_resources(ucp_context_h context,
return UCS_ERR_NO_DEVICE;
}

/* Error check: Make sure there are not too many transports */
if (context->num_tls >= UCP_MAX_RESOURCES) {
ucs_error("exceeded transports/devices limit "
"(%u requested, up to %d are supported)",
context->num_tls, UCP_MAX_RESOURCES);
return UCS_ERR_EXCEEDS_LIMIT;
}

return ucp_check_tl_names(context);
}

Expand All @@ -1720,11 +1730,11 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
uct_component_attr_t uct_component_attr;
unsigned num_tl_resources;
ucs_status_t status;
ucp_rsc_index_t i;
const uct_md_attr_v2_t *md_attr;
unsigned md_index;
uint64_t detect_mem_type_mask;
uint64_t alloc_mem_type_mask;
unsigned i;

/* List memory domain resources */
uct_component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES |
Expand Down