diff --git a/libcudacxx/include/cuda/__execution/guarantee.h b/libcudacxx/include/cuda/__execution/guarantee.h new file mode 100644 index 00000000000..03f99da8757 --- /dev/null +++ b/libcudacxx/include/cuda/__execution/guarantee.h @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __CUDA___EXECUTION_GUARANTEE_H +#define __CUDA___EXECUTION_GUARANTEE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include + +_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION + +//! @brief Base class of all guarantees that can be passed to @c cuda::execution::guarantee. +//! +//! A guarantee is a promise that the caller makes to an algorithm about its input or the problem being solved (e.g. an +//! upper bound on the total number of items). Algorithms may exploit guarantees to select faster code paths or smaller +//! intermediate types. This is the dual of @c cuda::execution::__requirement, which describes a property that the +//! caller demands from the algorithm. Unlike requirements, guarantees may be stateful, i.e. they may carry a runtime +//! value. +class __guarantee +{}; + +struct __get_guarantees_t +{ + _CCCL_EXEC_CHECK_DISABLE + _CCCL_TEMPLATE(class _Env) + _CCCL_REQUIRES(::cuda::std::execution::__queryable_with<_Env, __get_guarantees_t>) + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto operator()(const _Env& __env) const noexcept + { + static_assert(noexcept(__env.query(*this))); + return __env.query(*this); + } + + [[nodiscard]] + _CCCL_NODEBUG_API static constexpr auto query(::cuda::std::execution::forwarding_query_t) noexcept -> bool + { + return true; + } +}; + +_CCCL_GLOBAL_CONSTANT auto __get_guarantees = __get_guarantees_t{}; + +//! @brief Bundles a pack of guarantees into an environment that can be passed to device-wide parallel algorithms. +//! +//! The returned property is keyed by @c __get_guarantees_t so that individual guarantees are only visible to algorithms +//! through the guarantees environment, mirroring how @c cuda::execution::require exposes requirements. Each guarantee +//! is stored by value, preserving any runtime state it carries. +template +[[nodiscard]] _CCCL_NODEBUG_API auto guarantee(_Guarantees... __guarantees) +{ + static_assert((::cuda::std::is_base_of_v<__guarantee, _Guarantees> && ...), + "Only guarantees can be passed to guarantee"); + + ::cuda::std::execution::env<_Guarantees...> __env{__guarantees...}; + + return ::cuda::std::execution::prop{__get_guarantees_t{}, __env}; +} + +_CCCL_END_NAMESPACE_CUDA_EXECUTION + +#include + +#endif // __CUDA___EXECUTION_GUARANTEE_H diff --git a/libcudacxx/include/cuda/__execution/max_total_num_items.h b/libcudacxx/include/cuda/__execution/max_total_num_items.h new file mode 100644 index 00000000000..23df63a540c --- /dev/null +++ b/libcudacxx/include/cuda/__execution/max_total_num_items.h @@ -0,0 +1,140 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __CUDA___EXECUTION_MAX_TOTAL_NUM_ITEMS_H +#define __CUDA___EXECUTION_MAX_TOTAL_NUM_ITEMS_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include +#include + +#include + +_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION + +//! @brief Guarantee describing an upper bound on the total number of items processed by an algorithm (e.g. the combined +//! size of all segments handled by cub::DeviceBatchedTopK). +//! +//! The bound is carried as an integral value whose type is inferred from the argument; that type distinguishes, for +//! example, a 32-bit from a 64-bit bound and lets algorithms size intermediate offset types accordingly. The bound can +//! be expressed as a compile-time bound (@c static_highest), a runtime bound (@c highest()), or both. A composable +//! @c min_total_num_items lower-bound guarantee may be added in the future. +struct __get_max_total_num_items_t; + +template +struct _CCCL_DECLSPEC_EMPTY_BASES __max_total_num_items_holder_t : __guarantee +{ + static_assert(::cuda::std::is_integral_v<_Tp>, "max_total_num_items requires an integral bound type"); + + using element_type = _Tp; + + static constexpr element_type static_highest = _StaticHighest; + + element_type __highest_; + + //! @brief Returns the effective (runtime) upper bound on the total number of items. + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto highest() const noexcept -> element_type + { + return __highest_; + } + + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto query(const __get_max_total_num_items_t&) const noexcept + -> const __max_total_num_items_holder_t& + { + return *this; + } +}; + +struct __get_max_total_num_items_t +{ + _CCCL_EXEC_CHECK_DISABLE + _CCCL_TEMPLATE(class _Env) + _CCCL_REQUIRES(::cuda::std::execution::__queryable_with<_Env, __get_max_total_num_items_t>) + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto operator()(const _Env& __env) const noexcept + { + static_assert(noexcept(__env.query(*this))); + return __env.query(*this); + } + + [[nodiscard]] + _CCCL_NODEBUG_API static constexpr auto query(::cuda::std::execution::forwarding_query_t) noexcept -> bool + { + return true; + } +}; + +_CCCL_GLOBAL_CONSTANT auto __get_max_total_num_items = __get_max_total_num_items_t{}; + +//! @brief Creates a guarantee with a compile-time upper bound on the total number of items. +//! +//! The bound type is inferred from the non-type template parameter, which must be integral. +//! +//! @tparam _Highest Compile-time upper bound on the total number of items. +//! @return A guarantee that can be passed to @c cuda::execution::guarantee. +template +[[nodiscard]] _CCCL_NODEBUG_API constexpr auto max_total_num_items() noexcept + -> __max_total_num_items_holder_t +{ + static_assert(::cuda::std::is_integral_v, "max_total_num_items requires an integral bound"); + return __max_total_num_items_holder_t{{}, _Highest}; +} + +//! @brief Creates a guarantee with a runtime upper bound on the total number of items. +//! +//! The bound type is inferred from the argument, which must be integral. The compile-time bound spans the whole type. +//! +//! @param __highest Runtime upper bound on the total number of items. +//! @return A guarantee that can be passed to @c cuda::execution::guarantee. +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(::cuda::std::is_integral_v<_Tp>) +[[nodiscard]] _CCCL_NODEBUG_API constexpr auto max_total_num_items(_Tp __highest) noexcept + -> __max_total_num_items_holder_t<_Tp, (::cuda::std::numeric_limits<_Tp>::max)()> +{ + return __max_total_num_items_holder_t<_Tp, (::cuda::std::numeric_limits<_Tp>::max)()>{{}, __highest}; +} + +//! @brief Creates a guarantee with both a compile-time and a runtime upper bound on the total number of items. +//! +//! The bound type is inferred from the non-type template parameter. The runtime bound must not exceed the compile-time +//! bound. +//! +//! @tparam _Highest Compile-time upper bound on the total number of items. +//! @param __highest Runtime upper bound on the total number of items, must be `<= _Highest`. +//! @return A guarantee that can be passed to @c cuda::execution::guarantee. +template +[[nodiscard]] _CCCL_NODEBUG_API constexpr auto max_total_num_items(_Tp __highest) noexcept + -> __max_total_num_items_holder_t +{ + static_assert(::cuda::std::is_integral_v, + "max_total_num_items requires an integral static bound"); + static_assert(::cuda::std::is_integral_v<_Tp>, "max_total_num_items requires an integral runtime bound"); + _CCCL_ASSERT(::cuda::std::cmp_less_equal(__highest, _Highest), + "max_total_num_items: the runtime bound must not exceed the static bound"); + return __max_total_num_items_holder_t{{}, static_cast(__highest)}; +} + +_CCCL_END_NAMESPACE_CUDA_EXECUTION + +#include + +#endif // __CUDA___EXECUTION_MAX_TOTAL_NUM_ITEMS_H diff --git a/libcudacxx/include/cuda/execution b/libcudacxx/include/cuda/execution index dfb698bcb4b..c9fafee22ea 100644 --- a/libcudacxx/include/cuda/execution +++ b/libcudacxx/include/cuda/execution @@ -22,6 +22,8 @@ #endif // no system header #include +#include +#include #include #include #include diff --git a/libcudacxx/include/cuda/execution.guarantee.h b/libcudacxx/include/cuda/execution.guarantee.h new file mode 100644 index 00000000000..3f891675c34 --- /dev/null +++ b/libcudacxx/include/cuda/execution.guarantee.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_EXECUTION_EXECUTION_GUARANTEE_H +#define _CUDA_EXECUTION_EXECUTION_GUARANTEE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +#endif // _CUDA_EXECUTION_EXECUTION_GUARANTEE_H diff --git a/libcudacxx/include/cuda/execution.max_total_num_items.h b/libcudacxx/include/cuda/execution.max_total_num_items.h new file mode 100644 index 00000000000..8e0570d4cfd --- /dev/null +++ b/libcudacxx/include/cuda/execution.max_total_num_items.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_EXECUTION_EXECUTION_MAX_TOTAL_NUM_ITEMS_H +#define _CUDA_EXECUTION_EXECUTION_MAX_TOTAL_NUM_ITEMS_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +#endif // _CUDA_EXECUTION_EXECUTION_MAX_TOTAL_NUM_ITEMS_H diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp new file mode 100644 index 00000000000..a6174f5eb4a --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +#include "test_macros.h" + +[[maybe_unused]] _CCCL_GLOBAL_CONSTANT struct query_t +{ +} query{}; + +TEST_FUNC void test() +{ + // not every environment is a guarantee + cuda::std::execution::prop p{query, 42}; + cuda::execution::guarantee(p); +} + +int main(int, char**) +{ + test(); + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp new file mode 100644 index 00000000000..0d67e6bec4d --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "test_macros.h" + +TEST_FUNC void test() +{ + namespace exec = cuda::execution; + + // A guarantee is only visible to an algorithm through the guarantees environment produced by guarantee(...), + // mirroring how requirements are only visible through the requirements environment produced by require(...). + const auto genv = exec::guarantee(exec::max_total_num_items<1000>()); + const auto resolved = exec::__get_max_total_num_items(exec::__get_guarantees(genv)); + static_assert(cuda::std::is_base_of_v>); + assert(resolved.highest() == 1000); + + // The guarantees query is a forwarding query, just like the requirements query. + static_assert(cuda::std::execution::forwarding_query(exec::__get_guarantees_t{})); +} + +int main(int, char**) +{ + test(); + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp new file mode 100644 index 00000000000..baffa2de37b --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +#include "test_macros.h" + +TEST_FUNC void test() +{ + // The bound must be of integral type: a floating-point argument has no viable overload. + [[maybe_unused]] auto guarantee = cuda::execution::max_total_num_items(1.5); +} + +int main(int, char**) +{ + test(); + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp new file mode 100644 index 00000000000..86081a0d029 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp @@ -0,0 +1,85 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "test_macros.h" + +TEST_FUNC void test() +{ + namespace exec = cuda::execution; + + // (a) static upper bound: the element type is inferred from the non-type template parameter. + { + const auto guarantee = exec::max_total_num_items<1000>(); + using holder_t = cuda::std::remove_cvref_t; + static_assert(cuda::std::is_base_of_v); + static_assert(cuda::std::is_same_v); + static_assert(holder_t::static_highest == 1000); + assert(guarantee.highest() == 1000); + } + + // A bound that does not fit into int infers a wider type, distinguishing 32-bit from 64-bit bounds. + { + const auto guarantee = exec::max_total_num_items<5'000'000'000>(); + using holder_t = cuda::std::remove_cvref_t; + static_assert(sizeof(holder_t::element_type) == 8); + static_assert(holder_t::static_highest == 5'000'000'000); + assert(guarantee.highest() == 5'000'000'000); + } + + // The element type can be selected explicitly through the literal type. + { + const auto guarantee = exec::max_total_num_items(); + using holder_t = cuda::std::remove_cvref_t; + static_assert(cuda::std::is_same_v); + assert(guarantee.highest() == 1000); + } + + // (b) runtime upper bound: the element type is inferred from the argument, the static bound spans the whole type. + { + const auto guarantee = exec::max_total_num_items(cuda::std::int32_t{1'000'000'000}); + using holder_t = cuda::std::remove_cvref_t; + static_assert(cuda::std::is_same_v); + static_assert(holder_t::static_highest == (cuda::std::numeric_limits::max)()); + assert(guarantee.highest() == 1'000'000'000); + } + + // (c) both static and runtime upper bounds; the runtime bound is narrower than the static one. + { + const auto guarantee = exec::max_total_num_items<1000>(500); + using holder_t = cuda::std::remove_cvref_t; + static_assert(holder_t::static_highest == 1000); + assert(guarantee.highest() == 500); + } + + // The query returns the guarantee itself, preserving both the compile-time and the runtime bounds. + { + const auto guarantee = exec::max_total_num_items<1000>(500); + const auto resolved = exec::__get_max_total_num_items(guarantee); + using holder_t = cuda::std::remove_cvref_t; + static_assert(holder_t::static_highest == 1000); + assert(resolved.highest() == 500); + } + + // The query is a forwarding query, just like the requirement queries. + static_assert(cuda::std::execution::forwarding_query(exec::__get_max_total_num_items_t{})); +} + +int main(int, char**) +{ + test(); + + return 0; +}