-
Notifications
You must be signed in to change notification settings - Fork 405
[libcu++] Adds exec::guarantee and the max_total_num_items guarantee
#9278
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,82 @@ | ||||||
| //===----------------------------------------------------------------------===// | ||||||
| // | ||||||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||||||
| // under the Apache License v2.0 with LLVM Exceptions. | ||||||
| // See https://llvm.org/LICENSE.txt for license information. | ||||||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. | ||||||
| // | ||||||
| //===----------------------------------------------------------------------===// | ||||||
|
|
||||||
| #ifndef __CUDA___EXECUTION_GUARANTEE_H | ||||||
| #define __CUDA___EXECUTION_GUARANTEE_H | ||||||
|
|
||||||
| #include <cuda/std/detail/__config> | ||||||
|
|
||||||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||||||
| # pragma GCC system_header | ||||||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||||||
| # pragma clang system_header | ||||||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||||||
| # pragma system_header | ||||||
| #endif // no system header | ||||||
|
|
||||||
| #include <cuda/std/__concepts/concept_macros.h> | ||||||
| #include <cuda/std/__execution/env.h> | ||||||
| #include <cuda/std/__type_traits/is_base_of.h> | ||||||
|
|
||||||
| #include <cuda/std/__cccl/prologue.h> | ||||||
|
|
||||||
| _CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION | ||||||
|
|
||||||
| //! @brief Base class of all guarantees that can be passed to @c cuda::execution::guarantee. | ||||||
| //! | ||||||
| //! A guarantee is a promise that the caller makes to an algorithm about its input or the problem being solved (e.g. an | ||||||
| //! upper bound on the total number of items). Algorithms may exploit guarantees to select faster code paths or smaller | ||||||
| //! intermediate types. This is the dual of @c cuda::execution::__requirement, which describes a property that the | ||||||
| //! caller demands from the algorithm. Unlike requirements, guarantees may be stateful, i.e. they may carry a runtime | ||||||
| //! value. | ||||||
| class __guarantee | ||||||
| {}; | ||||||
|
|
||||||
| struct __get_guarantees_t | ||||||
| { | ||||||
| _CCCL_EXEC_CHECK_DISABLE | ||||||
| _CCCL_TEMPLATE(class _Env) | ||||||
| _CCCL_REQUIRES(::cuda::std::execution::__queryable_with<_Env, __get_guarantees_t>) | ||||||
| [[nodiscard]] _CCCL_NODEBUG_API constexpr auto operator()(const _Env& __env) const noexcept | ||||||
| { | ||||||
| static_assert(noexcept(__env.query(*this))); | ||||||
| return __env.query(*this); | ||||||
| } | ||||||
|
|
||||||
| [[nodiscard]] | ||||||
| _CCCL_NODEBUG_API static constexpr auto query(::cuda::std::execution::forwarding_query_t) noexcept -> bool | ||||||
| { | ||||||
| return true; | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the significance of |
||||||
| } | ||||||
| }; | ||||||
|
|
||||||
| _CCCL_GLOBAL_CONSTANT auto __get_guarantees = __get_guarantees_t{}; | ||||||
|
|
||||||
| //! @brief Bundles a pack of guarantees into an environment that can be passed to device-wide parallel algorithms. | ||||||
| //! | ||||||
| //! The returned property is keyed by @c __get_guarantees_t so that individual guarantees are only visible to algorithms | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we be documenting I might separate this out of the doxygen comment block so that it's not included in the rendered docs. |
||||||
| //! through the guarantees environment, mirroring how @c cuda::execution::require exposes requirements. Each guarantee | ||||||
| //! is stored by value, preserving any runtime state it carries. | ||||||
| template <class... _Guarantees> | ||||||
| [[nodiscard]] _CCCL_NODEBUG_API auto guarantee(_Guarantees... __guarantees) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| { | ||||||
| static_assert((::cuda::std::is_base_of_v<__guarantee, _Guarantees> && ...), | ||||||
| "Only guarantees can be passed to guarantee"); | ||||||
|
|
||||||
| ::cuda::std::execution::env<_Guarantees...> __env{__guarantees...}; | ||||||
|
|
||||||
| return ::cuda::std::execution::prop{__get_guarantees_t{}, __env}; | ||||||
| } | ||||||
|
|
||||||
| _CCCL_END_NAMESPACE_CUDA_EXECUTION | ||||||
|
|
||||||
| #include <cuda/std/__cccl/epilogue.h> | ||||||
|
|
||||||
| #endif // __CUDA___EXECUTION_GUARANTEE_H | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||
| // under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef __CUDA___EXECUTION_MAX_TOTAL_NUM_ITEMS_H | ||
| #define __CUDA___EXECUTION_MAX_TOTAL_NUM_ITEMS_H | ||
|
|
||
| #include <cuda/std/detail/__config> | ||
|
|
||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
| # pragma GCC system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
| # pragma clang system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
| # pragma system_header | ||
| #endif // no system header | ||
|
|
||
| #include <cuda/__execution/guarantee.h> | ||
| #include <cuda/std/__concepts/concept_macros.h> | ||
| #include <cuda/std/__execution/env.h> | ||
| #include <cuda/std/__type_traits/is_integral.h> | ||
| #include <cuda/std/__utility/cmp.h> | ||
| #include <cuda/std/limits> | ||
|
|
||
| #include <cuda/std/__cccl/prologue.h> | ||
|
|
||
| _CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION | ||
|
|
||
| //! @brief Guarantee describing an upper bound on the total number of items processed by an algorithm (e.g. the combined | ||
| //! size of all segments handled by cub::DeviceBatchedTopK). | ||
| //! | ||
| //! The bound is carried as an integral value whose type is inferred from the argument; that type distinguishes, for | ||
| //! example, a 32-bit from a 64-bit bound and lets algorithms size intermediate offset types accordingly. The bound can | ||
| //! be expressed as a compile-time bound (@c static_highest), a runtime bound (@c highest()), or both. A composable | ||
| //! @c min_total_num_items lower-bound guarantee may be added in the future. | ||
| struct __get_max_total_num_items_t; | ||
|
|
||
| template <class _Tp, _Tp _StaticHighest> | ||
| struct _CCCL_DECLSPEC_EMPTY_BASES __max_total_num_items_holder_t : __guarantee | ||
| { | ||
| static_assert(::cuda::std::is_integral_v<_Tp>, "max_total_num_items requires an integral bound type"); | ||
|
|
||
| using element_type = _Tp; | ||
|
|
||
| static constexpr element_type static_highest = _StaticHighest; | ||
|
|
||
| element_type __highest_; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| //! @brief Returns the effective (runtime) upper bound on the total number of items. | ||
| [[nodiscard]] _CCCL_NODEBUG_API constexpr auto highest() const noexcept -> element_type | ||
| { | ||
| return __highest_; | ||
| } | ||
|
|
||
| [[nodiscard]] _CCCL_NODEBUG_API constexpr auto query(const __get_max_total_num_items_t&) const noexcept | ||
| -> const __max_total_num_items_holder_t& | ||
| { | ||
| return *this; | ||
| } | ||
| }; | ||
|
|
||
| struct __get_max_total_num_items_t | ||
| { | ||
| _CCCL_EXEC_CHECK_DISABLE | ||
| _CCCL_TEMPLATE(class _Env) | ||
| _CCCL_REQUIRES(::cuda::std::execution::__queryable_with<_Env, __get_max_total_num_items_t>) | ||
| [[nodiscard]] _CCCL_NODEBUG_API constexpr auto operator()(const _Env& __env) const noexcept | ||
| { | ||
| static_assert(noexcept(__env.query(*this))); | ||
| return __env.query(*this); | ||
| } | ||
|
|
||
| [[nodiscard]] | ||
| _CCCL_NODEBUG_API static constexpr auto query(::cuda::std::execution::forwarding_query_t) noexcept -> bool | ||
| { | ||
| return true; | ||
| } | ||
| }; | ||
|
|
||
| _CCCL_GLOBAL_CONSTANT auto __get_max_total_num_items = __get_max_total_num_items_t{}; | ||
|
|
||
| //! @brief Creates a guarantee with a compile-time upper bound on the total number of items. | ||
| //! | ||
| //! The bound type is inferred from the non-type template parameter, which must be integral. | ||
| //! | ||
| //! @tparam _Highest Compile-time upper bound on the total number of items. | ||
| //! @return A guarantee that can be passed to @c cuda::execution::guarantee. | ||
| template <auto _Highest> | ||
| [[nodiscard]] _CCCL_NODEBUG_API constexpr auto max_total_num_items() noexcept | ||
| -> __max_total_num_items_holder_t<decltype(_Highest), _Highest> | ||
| { | ||
| static_assert(::cuda::std::is_integral_v<decltype(_Highest)>, "max_total_num_items requires an integral bound"); | ||
| return __max_total_num_items_holder_t<decltype(_Highest), _Highest>{{}, _Highest}; | ||
| } | ||
|
|
||
| //! @brief Creates a guarantee with a runtime upper bound on the total number of items. | ||
| //! | ||
| //! The bound type is inferred from the argument, which must be integral. The compile-time bound spans the whole type. | ||
| //! | ||
| //! @param __highest Runtime upper bound on the total number of items. | ||
| //! @return A guarantee that can be passed to @c cuda::execution::guarantee. | ||
| _CCCL_TEMPLATE(class _Tp) | ||
| _CCCL_REQUIRES(::cuda::std::is_integral_v<_Tp>) | ||
| [[nodiscard]] _CCCL_NODEBUG_API constexpr auto max_total_num_items(_Tp __highest) noexcept | ||
| -> __max_total_num_items_holder_t<_Tp, (::cuda::std::numeric_limits<_Tp>::max)()> | ||
| { | ||
| return __max_total_num_items_holder_t<_Tp, (::cuda::std::numeric_limits<_Tp>::max)()>{{}, __highest}; | ||
| } | ||
|
|
||
| //! @brief Creates a guarantee with both a compile-time and a runtime upper bound on the total number of items. | ||
| //! | ||
| //! The bound type is inferred from the non-type template parameter. The runtime bound must not exceed the compile-time | ||
| //! bound. | ||
| //! | ||
| //! @tparam _Highest Compile-time upper bound on the total number of items. | ||
| //! @param __highest Runtime upper bound on the total number of items, must be `<= _Highest`. | ||
| //! @return A guarantee that can be passed to @c cuda::execution::guarantee. | ||
| template <auto _Highest, class _Tp> | ||
| [[nodiscard]] _CCCL_NODEBUG_API constexpr auto max_total_num_items(_Tp __highest) noexcept | ||
| -> __max_total_num_items_holder_t<decltype(_Highest), _Highest> | ||
| { | ||
| static_assert(::cuda::std::is_integral_v<decltype(_Highest)>, | ||
| "max_total_num_items requires an integral static bound"); | ||
| static_assert(::cuda::std::is_integral_v<_Tp>, "max_total_num_items requires an integral runtime bound"); | ||
| _CCCL_ASSERT(::cuda::std::cmp_less_equal(__highest, _Highest), | ||
| "max_total_num_items: the runtime bound must not exceed the static bound"); | ||
| return __max_total_num_items_holder_t<decltype(_Highest), _Highest>{{}, static_cast<decltype(_Highest)>(__highest)}; | ||
| } | ||
|
|
||
| _CCCL_END_NAMESPACE_CUDA_EXECUTION | ||
|
|
||
| #include <cuda/std/__cccl/epilogue.h> | ||
|
|
||
| #endif // __CUDA___EXECUTION_MAX_TOTAL_NUM_ITEMS_H | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||
| // under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef _CUDA_EXECUTION_EXECUTION_GUARANTEE_H | ||
| #define _CUDA_EXECUTION_EXECUTION_GUARANTEE_H | ||
|
|
||
| #include <cuda/std/detail/__config> | ||
|
|
||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
| # pragma GCC system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
| # pragma clang system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
| # pragma system_header | ||
| #endif // no system header | ||
|
|
||
| #include <cuda/__execution/guarantee.h> | ||
|
|
||
| #endif // _CUDA_EXECUTION_EXECUTION_GUARANTEE_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||
| // under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef _CUDA_EXECUTION_EXECUTION_MAX_TOTAL_NUM_ITEMS_H | ||
| #define _CUDA_EXECUTION_EXECUTION_MAX_TOTAL_NUM_ITEMS_H | ||
|
|
||
| #include <cuda/std/detail/__config> | ||
|
|
||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
| # pragma GCC system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
| # pragma clang system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
| # pragma system_header | ||
| #endif // no system header | ||
|
|
||
| #include <cuda/__execution/max_total_num_items.h> | ||
|
|
||
| #endif // _CUDA_EXECUTION_EXECUTION_MAX_TOTAL_NUM_ITEMS_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||
| // under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include <cuda/execution.guarantee.h> | ||
|
|
||
| #include "test_macros.h" | ||
|
|
||
| [[maybe_unused]] _CCCL_GLOBAL_CONSTANT struct query_t | ||
| { | ||
| } query{}; | ||
|
|
||
| TEST_FUNC void test() | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| { | ||
| // not every environment is a guarantee | ||
| cuda::std::execution::prop p{query, 42}; | ||
| cuda::execution::guarantee(p); | ||
| } | ||
|
|
||
| int main(int, char**) | ||
| { | ||
| test(); | ||
|
|
||
| return 0; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||
| // under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include <cuda/execution.guarantee.h> | ||
| #include <cuda/execution.max_total_num_items.h> | ||
| #include <cuda/std/cassert> | ||
| #include <cuda/std/cstdint> | ||
| #include <cuda/std/type_traits> | ||
|
|
||
| #include "test_macros.h" | ||
|
|
||
| TEST_FUNC void test() | ||
| { | ||
| namespace exec = cuda::execution; | ||
|
|
||
| // A guarantee is only visible to an algorithm through the guarantees environment produced by guarantee(...), | ||
| // mirroring how requirements are only visible through the requirements environment produced by require(...). | ||
| const auto genv = exec::guarantee(exec::max_total_num_items<1000>()); | ||
| const auto resolved = exec::__get_max_total_num_items(exec::__get_guarantees(genv)); | ||
| static_assert(cuda::std::is_base_of_v<exec::__guarantee, cuda::std::remove_cvref_t<decltype(resolved)>>); | ||
| assert(resolved.highest() == 1000); | ||
|
|
||
| // The guarantees query is a forwarding query, just like the requirements query. | ||
| static_assert(cuda::std::execution::forwarding_query(exec::__get_guarantees_t{})); | ||
| } | ||
|
|
||
| int main(int, char**) | ||
| { | ||
| test(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should also test in |
||
|
|
||
| return 0; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||
| // under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include <cuda/execution.max_total_num_items.h> | ||
|
|
||
| #include "test_macros.h" | ||
|
|
||
| TEST_FUNC void test() | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| { | ||
| // The bound must be of integral type: a floating-point argument has no viable overload. | ||
| [[maybe_unused]] auto guarantee = cuda::execution::max_total_num_items(1.5); | ||
| } | ||
|
|
||
| int main(int, char**) | ||
| { | ||
| test(); | ||
|
|
||
| return 0; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alternatively just
noexcept(noexcept(__env.query(*this))in the signature.