Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public:
}

cudaGraphNode_t c;
cuda_safe_call(cudaGraphAddChildGraphNode(&c, graph, nullptr, 0, *inner_graph));
cuda_try(cudaGraphAddChildGraphNode(&c, graph, nullptr, 0, *inner_graph));
}

/* This simply executes the algorithm within the existing context. This
Expand Down Expand Up @@ -285,7 +285,7 @@ public:
cached_exec_graphs[stream].push_back(eg);
}

cuda_safe_call(cudaGraphLaunch(*eg, stream));
cuda_try(cudaGraphLaunch(*eg, stream));
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

// Free resources allocated through the adapter
adapter.clear();
Expand Down Expand Up @@ -339,7 +339,7 @@ public:
cached_exec_graphs[stream].push_back(eg);
}

cuda_safe_call(cudaGraphLaunch(*eg, stream));
cuda_try(cudaGraphLaunch(*eg, stream));

// Free resources allocated through the adapter
adapter.clear();
Expand Down
3 changes: 1 addition & 2 deletions cudax/include/cuda/experimental/__stf/internal/dot.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -560,8 +560,7 @@ public:
{
if (getenv("CUDASTF_DOT_COLOR_BY_DEVICE"))
{
int dev;
cuda_safe_call(cudaGetDevice(&dev));
const int dev = cuda_try<cudaGetDevice>();
EXPECT(dev < sizeof(colors) / sizeof(*colors));
current_color = colors[dev];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ public:
cache_size_limit = atol(str) * 1024 * 1024;
}

int ndevices;
cuda_safe_call(cudaGetDeviceCount(&ndevices));
const int ndevices = cuda_try<cudaGetDeviceCount>();

// One individual cache per device (TODO per execution place at some point
// if we consider green contexts or multi-gpu graphs ?)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public:

size_t sz = sizeof(T);

cuda_safe_call(cudaMemcpyAsync((void*) dst_instance.addr, (void*) src_instance.addr, sz, kind, stream));
cuda_try(cudaMemcpyAsync((void*) dst_instance.addr, (void*) src_instance.addr, sz, kind, stream));
}

void data_allocate(
Expand Down Expand Up @@ -267,9 +267,7 @@ public:
.extent = make_cudaExtent(sizeof(T), 1, 1),
.kind = kind};

cudaGraphNode_t result;
cuda_safe_call(cudaGraphAddMemcpyNode(&result, graph, input_nodes, input_cnt, &cpy_params));
return result;
return cuda_try<cudaGraphAddMemcpyNode>(graph, input_nodes, input_cnt, &cpy_params);
}

bool pin_host_memory(instance_id_t instance_id) override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class scheduler
public:
scheduler()
{
cuda_safe_call(cudaGetDeviceCount(&num_devices));
num_devices = cuda_try<cudaGetDeviceCount>();
assert(num_devices > 0);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ UNITTEST("thread hierarchy indexing")
auto config = p.get_config();
reserved::unit_test_thread_hierarchy<<<config[1], config[2]>>>(h);

cuda_safe_call(cudaDeviceSynchronize());
cuda_try(cudaDeviceSynchronize());
};

namespace reserved
Expand Down Expand Up @@ -473,15 +473,15 @@ UNITTEST("thread hierarchy sync")
auto config = p.get_config();

void* args[] = {&h};
cuda_safe_call(cudaLaunchCooperativeKernel(
cuda_try(cudaLaunchCooperativeKernel(
(void*) reserved::unit_test_thread_hierarchy_sync<true, size_t(0), true, size_t(1)>,
config[1],
config[2],
args,
0,
0));

cuda_safe_call(cudaDeviceSynchronize());
cuda_try(cudaDeviceSynchronize());
};

namespace reserved
Expand Down Expand Up @@ -511,7 +511,7 @@ UNITTEST("thread hierarchy inner sync")
auto config = p.get_config();
reserved::unit_test_thread_hierarchy_inner_sync<false, size_t(0), true, size_t(0)><<<config[1], config[2]>>>(h);

cuda_safe_call(cudaDeviceSynchronize());
cuda_try(cudaDeviceSynchronize());
};

# endif // !defined(CUDASTF_DISABLE_CODE_GENERATION) && _CCCL_CUDA_COMPILATION()
Expand Down
Loading