From 2c4311c3918b49d9e455f2fb04aea83a3374caa7 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Tue, 18 Nov 2025 14:40:30 -0800 Subject: [PATCH 1/4] Add `cuda::execution::guarantee`'s API and `cuda::execution::segment_size::max_segment_size` --- .../include/cuda/__execution/guarantee.h | 75 ++++++++++++++++ .../cuda/__execution/max_segment_size.h | 85 +++++++++++++++++++ .../cuda/execution/guarantee.fail.cpp | 29 +++++++ .../cuda/execution/guarantee.pass.cpp | 26 ++++++ .../cuda/execution/max_segment_size.pass.cpp | 28 ++++++ 5 files changed, 243 insertions(+) create mode 100644 libcudacxx/include/cuda/__execution/guarantee.h create mode 100644 libcudacxx/include/cuda/__execution/max_segment_size.h create mode 100644 libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp diff --git a/libcudacxx/include/cuda/__execution/guarantee.h b/libcudacxx/include/cuda/__execution/guarantee.h new file mode 100644 index 00000000000..6a149a8d235 --- /dev/null +++ b/libcudacxx/include/cuda/__execution/guarantee.h @@ -0,0 +1,75 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __CUDA___EXECUTION_GUARANTEE_H +#define __CUDA___EXECUTION_GUARANTEE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include + +#include + +_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION + +class __guarantee +{}; + +struct __get_guarantees_t +{ + _CCCL_EXEC_CHECK_DISABLE + _CCCL_TEMPLATE(class _Env) + _CCCL_REQUIRES(::cuda::std::execution::__queryable_with<_Env, __get_guarantees_t>) + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto operator()(const _Env& __env) const noexcept + { + static_assert(noexcept(__env.query(*this))); + return __env.query(*this); + } + + [[nodiscard]] + _CCCL_NODEBUG_API static constexpr auto query(::cuda::std::execution::forwarding_query_t) noexcept -> bool + { + return true; + } +}; + +_CCCL_GLOBAL_CONSTANT auto __get_guarantees = __get_guarantees_t{}; + +template +[[nodiscard]] _CCCL_NODEBUG_API auto guarantee(_Guarantees...) +{ + static_assert((::cuda::std::is_base_of_v<__guarantee, _Guarantees> && ...), + "Only guarantees can be passed to require"); + static_assert((::cuda::std::is_empty_v<_Guarantees> && ...), "Stateful guarantees are not implemented"); + + // clang < 19 doesn't like this code + // since the only guarantees we currently allow are in max_segment_size.h and + // all of them are stateless, let's ignore incoming parameters + ::cuda::std::execution::env<_Guarantees...> __env{}; + + return ::cuda::std::execution::prop{__get_guarantees_t{}, __env}; +} + +_CCCL_END_NAMESPACE_CUDA_EXECUTION + +#include + +#endif // __CUDA___EXECUTION_REQUIRE_H diff --git a/libcudacxx/include/cuda/__execution/max_segment_size.h b/libcudacxx/include/cuda/__execution/max_segment_size.h new file mode 100644 index 00000000000..cf7aa341b15 --- /dev/null +++ b/libcudacxx/include/cuda/__execution/max_segment_size.h @@ -0,0 +1,85 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __CUDA___EXECUTION_MAX_SEG_SIZE_H +#define __CUDA___EXECUTION_MAX_SEG_SIZE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include + +#include + +_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION + +namespace segment_size +{ +struct __get_max_segment_size_t; + +// TODO : add dynamic extent specialization later, when stateful guarantees (env) are supported + +//! A class template that can be used to specify the maximum segment size +//! for segmented algorithms. +//! \tparam _N The maximum segment size. +template +struct max_segment_size : __guarantee +{ + using value_type = size_t; + + constexpr max_segment_size() = default; + + _CCCL_API constexpr operator value_type() const noexcept + { + return _N; + } + + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto query(const __get_max_segment_size_t&) const noexcept + { + return *this; + } +}; + +struct __get_max_segment_size_t +{ + _CCCL_EXEC_CHECK_DISABLE + _CCCL_TEMPLATE(class _Env) + _CCCL_REQUIRES(::cuda::std::execution::__queryable_with<_Env, __get_max_segment_size_t>) + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto operator()(const _Env& __env) const noexcept + { + static_assert(noexcept(__env.query(*this))); + return __env.query(*this); + } + + [[nodiscard]] + _CCCL_NODEBUG_API static constexpr auto query(::cuda::std::execution::forwarding_query_t) noexcept -> bool + { + return true; + } +}; + +_CCCL_GLOBAL_CONSTANT auto __get_max_segment_size = __get_max_segment_size_t{}; +} // namespace segment_size + +_CCCL_END_NAMESPACE_CUDA_EXECUTION + +#include + +#endif // __CUDA___EXECUTION_MAX_SEG_SIZE_H diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp new file mode 100644 index 00000000000..d0a9cc86f2d --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +[[maybe_unused]] _CCCL_GLOBAL_CONSTANT struct query_t +{ +} query{}; + +__host__ __device__ void test() +{ + // not every environment is a requirement + cuda::std::execution::prop p{query, 42}; + cuda::execution::guarantee(p); +} + +int main(int, char**) +{ + test(); + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp new file mode 100644 index 00000000000..55722911a75 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +__host__ __device__ void test() +{ + static_assert( + cuda::std::is_same_v{})))), + cuda::execution::segment_size::max_segment_size<42>>); +} + +int main(int, char**) +{ + test(); + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp new file mode 100644 index 00000000000..607f2d2e1f0 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +__host__ __device__ void test() +{ + namespace exec = cuda::execution; + static_assert(cuda::std::is_base_of_v>); + + static_assert( + cuda::std::is_same_v{})), + exec::segment_size::max_segment_size<42>>); +} + +int main(int, char**) +{ + test(); + + return 0; +} From 5d2c817830f6766a56a6aa6dea893f53c7ac9ad7 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Wed, 19 Nov 2025 11:30:15 -0800 Subject: [PATCH 2/4] Replace template parameter _N with _Size --- libcudacxx/include/cuda/__execution/max_segment_size.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcudacxx/include/cuda/__execution/max_segment_size.h b/libcudacxx/include/cuda/__execution/max_segment_size.h index cf7aa341b15..a3cd5e15add 100644 --- a/libcudacxx/include/cuda/__execution/max_segment_size.h +++ b/libcudacxx/include/cuda/__execution/max_segment_size.h @@ -38,8 +38,8 @@ struct __get_max_segment_size_t; //! A class template that can be used to specify the maximum segment size //! for segmented algorithms. -//! \tparam _N The maximum segment size. -template +//! \tparam _Size The maximum segment size. +template struct max_segment_size : __guarantee { using value_type = size_t; @@ -48,7 +48,7 @@ struct max_segment_size : __guarantee _CCCL_API constexpr operator value_type() const noexcept { - return _N; + return _Size; } [[nodiscard]] _CCCL_NODEBUG_API constexpr auto query(const __get_max_segment_size_t&) const noexcept From 7aa7fed1fa7f6e5d6d92ffec74da3e89ca973baf Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Mon, 1 Dec 2025 13:56:31 -0800 Subject: [PATCH 3/4] remove usage of sub-namespace `segment_size` --- libcudacxx/include/cuda/__execution/max_segment_size.h | 3 --- .../test/libcudacxx/cuda/execution/guarantee.pass.cpp | 7 +++---- .../libcudacxx/cuda/execution/max_segment_size.pass.cpp | 7 +++---- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/libcudacxx/include/cuda/__execution/max_segment_size.h b/libcudacxx/include/cuda/__execution/max_segment_size.h index a3cd5e15add..5d6277b4641 100644 --- a/libcudacxx/include/cuda/__execution/max_segment_size.h +++ b/libcudacxx/include/cuda/__execution/max_segment_size.h @@ -30,8 +30,6 @@ _CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION -namespace segment_size -{ struct __get_max_segment_size_t; // TODO : add dynamic extent specialization later, when stateful guarantees (env) are supported @@ -76,7 +74,6 @@ struct __get_max_segment_size_t }; _CCCL_GLOBAL_CONSTANT auto __get_max_segment_size = __get_max_segment_size_t{}; -} // namespace segment_size _CCCL_END_NAMESPACE_CUDA_EXECUTION diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp index 55722911a75..fd7973d5c27 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp @@ -12,10 +12,9 @@ __host__ __device__ void test() { - static_assert( - cuda::std::is_same_v{})))), - cuda::execution::segment_size::max_segment_size<42>>); + static_assert(cuda::std::is_same_v{})))), + cuda::execution::max_segment_size<42>>); } int main(int, char**) diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp index 607f2d2e1f0..f122556369a 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp @@ -13,11 +13,10 @@ __host__ __device__ void test() { namespace exec = cuda::execution; - static_assert(cuda::std::is_base_of_v>); + static_assert(cuda::std::is_base_of_v>); - static_assert( - cuda::std::is_same_v{})), - exec::segment_size::max_segment_size<42>>); + static_assert(cuda::std::is_same_v{})), + exec::max_segment_size<42>>); } int main(int, char**) From b2b7938825a4e78bb3bbc5192c9c3eeb0abc1558 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Tue, 9 Dec 2025 10:25:35 -0800 Subject: [PATCH 4/4] Extend Gurantee's API and max_segment_size to support stateful/dynamic guarantees --- .../include/cuda/__execution/guarantee.h | 11 ++---- .../cuda/__execution/max_segment_size.h | 34 +++++++++++++++-- .../cuda/execution/guarantee.pass.cpp | 25 ++++++++++--- .../cuda/execution/max_segment_size.pass.cpp | 37 +++++++++++++++++-- 4 files changed, 88 insertions(+), 19 deletions(-) diff --git a/libcudacxx/include/cuda/__execution/guarantee.h b/libcudacxx/include/cuda/__execution/guarantee.h index 6a149a8d235..c303ad65a08 100644 --- a/libcudacxx/include/cuda/__execution/guarantee.h +++ b/libcudacxx/include/cuda/__execution/guarantee.h @@ -54,16 +54,13 @@ struct __get_guarantees_t _CCCL_GLOBAL_CONSTANT auto __get_guarantees = __get_guarantees_t{}; template -[[nodiscard]] _CCCL_NODEBUG_API auto guarantee(_Guarantees...) +[[nodiscard]] _CCCL_NODEBUG_API constexpr auto guarantee(_Guarantees... __guarantees_args) noexcept { static_assert((::cuda::std::is_base_of_v<__guarantee, _Guarantees> && ...), - "Only guarantees can be passed to require"); - static_assert((::cuda::std::is_empty_v<_Guarantees> && ...), "Stateful guarantees are not implemented"); + "Only guarantees can be passed to guarantee"); + // static_assert((::cuda::std::is_empty_v<_Guarantees> && ...), "Stateful guarantees are not implemented"); - // clang < 19 doesn't like this code - // since the only guarantees we currently allow are in max_segment_size.h and - // all of them are stateless, let's ignore incoming parameters - ::cuda::std::execution::env<_Guarantees...> __env{}; + ::cuda::std::execution::env<_Guarantees...> __env{__guarantees_args...}; return ::cuda::std::execution::prop{__get_guarantees_t{}, __env}; } diff --git a/libcudacxx/include/cuda/__execution/max_segment_size.h b/libcudacxx/include/cuda/__execution/max_segment_size.h index 5d6277b4641..4b5f551a893 100644 --- a/libcudacxx/include/cuda/__execution/max_segment_size.h +++ b/libcudacxx/include/cuda/__execution/max_segment_size.h @@ -32,18 +32,21 @@ _CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION struct __get_max_segment_size_t; -// TODO : add dynamic extent specialization later, when stateful guarantees (env) are supported +_CCCL_GLOBAL_CONSTANT auto dynamic_max_segment_size = static_cast(-1); //! A class template that can be used to specify the maximum segment size //! for segmented algorithms. //! \tparam _Size The maximum segment size. -template +template struct max_segment_size : __guarantee { - using value_type = size_t; + using value_type = size_t; + static constexpr size_t size = _Size; constexpr max_segment_size() = default; + _CCCL_API constexpr max_segment_size(size_t) noexcept {} + _CCCL_API constexpr operator value_type() const noexcept { return _Size; @@ -55,6 +58,31 @@ struct max_segment_size : __guarantee } }; +template <> +struct max_segment_size : __guarantee +{ + using value_type = size_t; + + static constexpr size_t size = dynamic_max_segment_size; + + _CCCL_API constexpr max_segment_size(size_t __s) + : __val(__s) + {} + + _CCCL_API constexpr operator value_type() const noexcept + { + return __val; + } + + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto query(const __get_max_segment_size_t&) const noexcept + { + return *this; + } + +private: + size_t __val; +}; + struct __get_max_segment_size_t { _CCCL_EXEC_CHECK_DISABLE diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp index fd7973d5c27..3090fda8cc0 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp @@ -8,18 +8,31 @@ // //===----------------------------------------------------------------------===// +#include #include +#include -__host__ __device__ void test() +__host__ __device__ void test(size_t dynamic_val) { - static_assert(cuda::std::is_same_v{})))), - cuda::execution::max_segment_size<42>>); + // Test for state-less guarantee + auto static_genv = cuda::execution::guarantee(cuda::execution::max_segment_size<42>{}); + auto static_env = ::cuda::std::execution::env{static_genv}; + auto static_genv_extracted = cuda::execution::__get_guarantees(static_env); + (void) static_genv_extracted; + + // Test for stateful guarantee + auto dynamic_genv = cuda::execution::guarantee(cuda::execution::max_segment_size<>{dynamic_val}); + auto dynamic_env = ::cuda::std::execution::env{dynamic_genv}; + auto dynamic_genv_extracted = cuda::execution::__get_guarantees(dynamic_env); + (void) dynamic_genv_extracted; + + // Test that max_segment_size is a guarantee + static_assert(cuda::std::is_base_of_v>); } -int main(int, char**) +int main(int argc, char**) { - test(); + test(argc); return 0; } diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp index f122556369a..c93bfd32e85 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_segment_size.pass.cpp @@ -8,20 +8,51 @@ // //===----------------------------------------------------------------------===// +#include #include +#include -__host__ __device__ void test() +__host__ __device__ void test(size_t dynamic_val) { namespace exec = cuda::execution; static_assert(cuda::std::is_base_of_v>); static_assert(cuda::std::is_same_v{})), exec::max_segment_size<42>>); + + static_assert(cuda::std::is_same_v{dynamic_val})), + exec::max_segment_size>); + + static_assert( + cuda::std::is_same_v< + decltype(exec::__get_max_segment_size(exec::__get_guarantees(exec::guarantee(exec::max_segment_size<42>{})))), + exec::max_segment_size<42>>); + + static_assert(cuda::std::is_same_v{dynamic_val})))), + exec::max_segment_size>); + + constexpr exec::max_segment_size<42> static_size{}; + static_assert(static_cast(static_size) == 42); + + constexpr exec::max_segment_size<42> static_size_with_runtime_value{100}; + // ignore runtime value in case of static size + static_assert(static_cast(static_size_with_runtime_value) == 42); + + exec::max_segment_size dynamic_size{dynamic_val}; + (void) (static_cast(dynamic_size) == dynamic_val); + + auto g_env = exec::guarantee(dynamic_size); + (void) g_env; + + auto dynamic_size_extracted = + ::cuda::std::execution::__query_or(g_env, exec::__get_max_segment_size_t{}, exec::max_segment_size<0>{}); + (void) (static_cast(dynamic_size_extracted) == dynamic_val); } -int main(int, char**) +int main(int argc, char**) { - test(); + test(argc); return 0; }