From ac143a8ca68f86768723e08c56e935681c37aaef Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 5 Jun 2026 01:38:42 -0700 Subject: [PATCH 1/4] adds max_total_num_items guarantee --- .../include/cuda/__execution/guarantee.h | 81 ++++++++++ .../cuda/__execution/max_total_num_items.h | 140 ++++++++++++++++++ libcudacxx/include/cuda/execution | 2 + libcudacxx/include/cuda/execution.guarantee.h | 26 ++++ .../cuda/execution.max_total_num_items.h | 26 ++++ .../cuda/execution/guarantee.fail.cpp | 29 ++++ .../cuda/execution/guarantee.pass.cpp | 40 +++++ .../execution/max_total_num_items.fail.cpp | 24 +++ .../execution/max_total_num_items.pass.cpp | 84 +++++++++++ 9 files changed, 452 insertions(+) create mode 100644 libcudacxx/include/cuda/__execution/guarantee.h create mode 100644 libcudacxx/include/cuda/__execution/max_total_num_items.h create mode 100644 libcudacxx/include/cuda/execution.guarantee.h create mode 100644 libcudacxx/include/cuda/execution.max_total_num_items.h create mode 100644 libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp diff --git a/libcudacxx/include/cuda/__execution/guarantee.h b/libcudacxx/include/cuda/__execution/guarantee.h new file mode 100644 index 00000000000..485a096ff96 --- /dev/null +++ b/libcudacxx/include/cuda/__execution/guarantee.h @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __CUDA___EXECUTION_GUARANTEE_H +#define __CUDA___EXECUTION_GUARANTEE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include + +_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION + +//! @brief Base class of all guarantees that can be passed to @c cuda::execution::guarantee. +//! +//! A guarantee is a promise that the caller makes to an algorithm about its input or the problem being solved (e.g. an +//! upper bound on the total number of items). Algorithms may exploit guarantees to select faster code paths or smaller +//! intermediate types. This is the dual of @c cuda::execution::__requirement, which describes a property that the caller +//! demands from the algorithm. Unlike requirements, guarantees may be stateful, i.e. they may carry a runtime value. +class __guarantee +{}; + +struct __get_guarantees_t +{ + _CCCL_EXEC_CHECK_DISABLE + _CCCL_TEMPLATE(class _Env) + _CCCL_REQUIRES(::cuda::std::execution::__queryable_with<_Env, __get_guarantees_t>) + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto operator()(const _Env& __env) const noexcept + { + static_assert(noexcept(__env.query(*this))); + return __env.query(*this); + } + + [[nodiscard]] + _CCCL_NODEBUG_API static constexpr auto query(::cuda::std::execution::forwarding_query_t) noexcept -> bool + { + return true; + } +}; + +_CCCL_GLOBAL_CONSTANT auto __get_guarantees = __get_guarantees_t{}; + +//! @brief Bundles a pack of guarantees into an environment that can be passed to device-wide parallel algorithms. +//! +//! The returned property is keyed by @c __get_guarantees_t so that individual guarantees are only visible to algorithms +//! through the guarantees environment, mirroring how @c cuda::execution::require exposes requirements. Each guarantee is +//! stored by value, preserving any runtime state it carries. +template +[[nodiscard]] _CCCL_NODEBUG_API auto guarantee(_Guarantees... __guarantees) +{ + static_assert((::cuda::std::is_base_of_v<__guarantee, _Guarantees> && ...), + "Only guarantees can be passed to guarantee"); + + ::cuda::std::execution::env<_Guarantees...> __env{__guarantees...}; + + return ::cuda::std::execution::prop{__get_guarantees_t{}, __env}; +} + +_CCCL_END_NAMESPACE_CUDA_EXECUTION + +#include + +#endif // __CUDA___EXECUTION_GUARANTEE_H diff --git a/libcudacxx/include/cuda/__execution/max_total_num_items.h b/libcudacxx/include/cuda/__execution/max_total_num_items.h new file mode 100644 index 00000000000..23df63a540c --- /dev/null +++ b/libcudacxx/include/cuda/__execution/max_total_num_items.h @@ -0,0 +1,140 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __CUDA___EXECUTION_MAX_TOTAL_NUM_ITEMS_H +#define __CUDA___EXECUTION_MAX_TOTAL_NUM_ITEMS_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include +#include + +#include + +_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION + +//! @brief Guarantee describing an upper bound on the total number of items processed by an algorithm (e.g. the combined +//! size of all segments handled by cub::DeviceBatchedTopK). +//! +//! The bound is carried as an integral value whose type is inferred from the argument; that type distinguishes, for +//! example, a 32-bit from a 64-bit bound and lets algorithms size intermediate offset types accordingly. The bound can +//! be expressed as a compile-time bound (@c static_highest), a runtime bound (@c highest()), or both. A composable +//! @c min_total_num_items lower-bound guarantee may be added in the future. +struct __get_max_total_num_items_t; + +template +struct _CCCL_DECLSPEC_EMPTY_BASES __max_total_num_items_holder_t : __guarantee +{ + static_assert(::cuda::std::is_integral_v<_Tp>, "max_total_num_items requires an integral bound type"); + + using element_type = _Tp; + + static constexpr element_type static_highest = _StaticHighest; + + element_type __highest_; + + //! @brief Returns the effective (runtime) upper bound on the total number of items. + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto highest() const noexcept -> element_type + { + return __highest_; + } + + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto query(const __get_max_total_num_items_t&) const noexcept + -> const __max_total_num_items_holder_t& + { + return *this; + } +}; + +struct __get_max_total_num_items_t +{ + _CCCL_EXEC_CHECK_DISABLE + _CCCL_TEMPLATE(class _Env) + _CCCL_REQUIRES(::cuda::std::execution::__queryable_with<_Env, __get_max_total_num_items_t>) + [[nodiscard]] _CCCL_NODEBUG_API constexpr auto operator()(const _Env& __env) const noexcept + { + static_assert(noexcept(__env.query(*this))); + return __env.query(*this); + } + + [[nodiscard]] + _CCCL_NODEBUG_API static constexpr auto query(::cuda::std::execution::forwarding_query_t) noexcept -> bool + { + return true; + } +}; + +_CCCL_GLOBAL_CONSTANT auto __get_max_total_num_items = __get_max_total_num_items_t{}; + +//! @brief Creates a guarantee with a compile-time upper bound on the total number of items. +//! +//! The bound type is inferred from the non-type template parameter, which must be integral. +//! +//! @tparam _Highest Compile-time upper bound on the total number of items. +//! @return A guarantee that can be passed to @c cuda::execution::guarantee. +template +[[nodiscard]] _CCCL_NODEBUG_API constexpr auto max_total_num_items() noexcept + -> __max_total_num_items_holder_t +{ + static_assert(::cuda::std::is_integral_v, "max_total_num_items requires an integral bound"); + return __max_total_num_items_holder_t{{}, _Highest}; +} + +//! @brief Creates a guarantee with a runtime upper bound on the total number of items. +//! +//! The bound type is inferred from the argument, which must be integral. The compile-time bound spans the whole type. +//! +//! @param __highest Runtime upper bound on the total number of items. +//! @return A guarantee that can be passed to @c cuda::execution::guarantee. +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(::cuda::std::is_integral_v<_Tp>) +[[nodiscard]] _CCCL_NODEBUG_API constexpr auto max_total_num_items(_Tp __highest) noexcept + -> __max_total_num_items_holder_t<_Tp, (::cuda::std::numeric_limits<_Tp>::max)()> +{ + return __max_total_num_items_holder_t<_Tp, (::cuda::std::numeric_limits<_Tp>::max)()>{{}, __highest}; +} + +//! @brief Creates a guarantee with both a compile-time and a runtime upper bound on the total number of items. +//! +//! The bound type is inferred from the non-type template parameter. The runtime bound must not exceed the compile-time +//! bound. +//! +//! @tparam _Highest Compile-time upper bound on the total number of items. +//! @param __highest Runtime upper bound on the total number of items, must be `<= _Highest`. +//! @return A guarantee that can be passed to @c cuda::execution::guarantee. +template +[[nodiscard]] _CCCL_NODEBUG_API constexpr auto max_total_num_items(_Tp __highest) noexcept + -> __max_total_num_items_holder_t +{ + static_assert(::cuda::std::is_integral_v, + "max_total_num_items requires an integral static bound"); + static_assert(::cuda::std::is_integral_v<_Tp>, "max_total_num_items requires an integral runtime bound"); + _CCCL_ASSERT(::cuda::std::cmp_less_equal(__highest, _Highest), + "max_total_num_items: the runtime bound must not exceed the static bound"); + return __max_total_num_items_holder_t{{}, static_cast(__highest)}; +} + +_CCCL_END_NAMESPACE_CUDA_EXECUTION + +#include + +#endif // __CUDA___EXECUTION_MAX_TOTAL_NUM_ITEMS_H diff --git a/libcudacxx/include/cuda/execution b/libcudacxx/include/cuda/execution index dfb698bcb4b..c9fafee22ea 100644 --- a/libcudacxx/include/cuda/execution +++ b/libcudacxx/include/cuda/execution @@ -22,6 +22,8 @@ #endif // no system header #include +#include +#include #include #include #include diff --git a/libcudacxx/include/cuda/execution.guarantee.h b/libcudacxx/include/cuda/execution.guarantee.h new file mode 100644 index 00000000000..3f891675c34 --- /dev/null +++ b/libcudacxx/include/cuda/execution.guarantee.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_EXECUTION_EXECUTION_GUARANTEE_H +#define _CUDA_EXECUTION_EXECUTION_GUARANTEE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +#endif // _CUDA_EXECUTION_EXECUTION_GUARANTEE_H diff --git a/libcudacxx/include/cuda/execution.max_total_num_items.h b/libcudacxx/include/cuda/execution.max_total_num_items.h new file mode 100644 index 00000000000..8e0570d4cfd --- /dev/null +++ b/libcudacxx/include/cuda/execution.max_total_num_items.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_EXECUTION_EXECUTION_MAX_TOTAL_NUM_ITEMS_H +#define _CUDA_EXECUTION_EXECUTION_MAX_TOTAL_NUM_ITEMS_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +#endif // _CUDA_EXECUTION_EXECUTION_MAX_TOTAL_NUM_ITEMS_H diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp new file mode 100644 index 00000000000..b1301ec12e2 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +[[maybe_unused]] _CCCL_GLOBAL_CONSTANT struct query_t +{ +} query{}; + +TEST_FUNC void test() +{ + // not every environment is a guarantee + cuda::std::execution::prop p{query, 42}; + cuda::execution::guarantee(p); +} + +int main(int, char**) +{ + test(); + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp new file mode 100644 index 00000000000..8eb43d3baee --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include +#include +#include + +#include "test_macros.h" + +TEST_FUNC void test() +{ + namespace exec = cuda::execution; + + // A guarantee is only visible to an algorithm through the guarantees environment produced by guarantee(...), mirroring + // how requirements are only visible through the requirements environment produced by require(...). + const auto genv = exec::guarantee(exec::max_total_num_items<1000>()); + const auto resolved = exec::__get_max_total_num_items(exec::__get_guarantees(genv)); + static_assert(cuda::std::is_base_of_v>); + assert(resolved.highest() == 1000); + + // The guarantees query is a forwarding query, just like the requirements query. + static_assert(cuda::std::execution::forwarding_query(exec::__get_guarantees_t{})); +} + +int main(int, char**) +{ + test(); + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp new file mode 100644 index 00000000000..d9c4d398fc2 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +TEST_FUNC void test() +{ + // The bound must be of integral type: a floating-point argument has no viable overload. + [[maybe_unused]] auto guarantee = cuda::execution::max_total_num_items(1.5); +} + +int main(int, char**) +{ + test(); + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp new file mode 100644 index 00000000000..e69c2a2b952 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp @@ -0,0 +1,84 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include +#include +#include + +#include "test_macros.h" + +TEST_FUNC void test() +{ + namespace exec = cuda::execution; + + // (a) static upper bound: the element type is inferred from the non-type template parameter. + { + const auto guarantee = exec::max_total_num_items<1000>(); + using holder_t = cuda::std::remove_cvref_t; + static_assert(cuda::std::is_base_of_v); + static_assert(cuda::std::is_same_v); + static_assert(holder_t::static_highest == 1000); + assert(guarantee.highest() == 1000); + } + + // A bound that does not fit into int infers a wider type, distinguishing 32-bit from 64-bit bounds. + { + const auto guarantee = exec::max_total_num_items<5'000'000'000>(); + using holder_t = cuda::std::remove_cvref_t; + static_assert(sizeof(holder_t::element_type) == 8); + static_assert(holder_t::static_highest == 5'000'000'000); + } + + // The element type can be selected explicitly through the literal type. + { + const auto guarantee = exec::max_total_num_items(); + using holder_t = cuda::std::remove_cvref_t; + static_assert(cuda::std::is_same_v); + } + + // (b) runtime upper bound: the element type is inferred from the argument, the static bound spans the whole type. + { + const auto guarantee = exec::max_total_num_items(cuda::std::int32_t{1'000'000'000}); + using holder_t = cuda::std::remove_cvref_t; + static_assert(cuda::std::is_same_v); + static_assert(holder_t::static_highest == (cuda::std::numeric_limits::max)()); + assert(guarantee.highest() == 1'000'000'000); + } + + // (c) both static and runtime upper bounds; the runtime bound is narrower than the static one. + { + const auto guarantee = exec::max_total_num_items<1000>(500); + using holder_t = cuda::std::remove_cvref_t; + static_assert(holder_t::static_highest == 1000); + assert(guarantee.highest() == 500); + } + + // The query returns the guarantee itself, preserving both the compile-time and the runtime bounds. + { + const auto guarantee = exec::max_total_num_items<1000>(500); + const auto resolved = exec::__get_max_total_num_items(guarantee); + using holder_t = cuda::std::remove_cvref_t; + static_assert(holder_t::static_highest == 1000); + assert(resolved.highest() == 500); + } + + // The query is a forwarding query, just like the requirement queries. + static_assert(cuda::std::execution::forwarding_query(exec::__get_max_total_num_items_t{})); +} + +int main(int, char**) +{ + test(); + + return 0; +} From 90b75814fa6843af20e014af2abb112cd0b58ba1 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 5 Jun 2026 01:53:59 -0700 Subject: [PATCH 2/4] fixes format --- libcudacxx/include/cuda/__execution/guarantee.h | 9 +++++---- .../test/libcudacxx/cuda/execution/guarantee.pass.cpp | 5 ++--- .../cuda/execution/max_total_num_items.pass.cpp | 1 - 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/libcudacxx/include/cuda/__execution/guarantee.h b/libcudacxx/include/cuda/__execution/guarantee.h index 485a096ff96..03f99da8757 100644 --- a/libcudacxx/include/cuda/__execution/guarantee.h +++ b/libcudacxx/include/cuda/__execution/guarantee.h @@ -33,8 +33,9 @@ _CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION //! //! A guarantee is a promise that the caller makes to an algorithm about its input or the problem being solved (e.g. an //! upper bound on the total number of items). Algorithms may exploit guarantees to select faster code paths or smaller -//! intermediate types. This is the dual of @c cuda::execution::__requirement, which describes a property that the caller -//! demands from the algorithm. Unlike requirements, guarantees may be stateful, i.e. they may carry a runtime value. +//! intermediate types. This is the dual of @c cuda::execution::__requirement, which describes a property that the +//! caller demands from the algorithm. Unlike requirements, guarantees may be stateful, i.e. they may carry a runtime +//! value. class __guarantee {}; @@ -61,8 +62,8 @@ _CCCL_GLOBAL_CONSTANT auto __get_guarantees = __get_guarantees_t{}; //! @brief Bundles a pack of guarantees into an environment that can be passed to device-wide parallel algorithms. //! //! The returned property is keyed by @c __get_guarantees_t so that individual guarantees are only visible to algorithms -//! through the guarantees environment, mirroring how @c cuda::execution::require exposes requirements. Each guarantee is -//! stored by value, preserving any runtime state it carries. +//! through the guarantees environment, mirroring how @c cuda::execution::require exposes requirements. Each guarantee +//! is stored by value, preserving any runtime state it carries. template [[nodiscard]] _CCCL_NODEBUG_API auto guarantee(_Guarantees... __guarantees) { diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp index 8eb43d3baee..0d67e6bec4d 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.pass.cpp @@ -10,7 +10,6 @@ #include #include - #include #include #include @@ -21,8 +20,8 @@ TEST_FUNC void test() { namespace exec = cuda::execution; - // A guarantee is only visible to an algorithm through the guarantees environment produced by guarantee(...), mirroring - // how requirements are only visible through the requirements environment produced by require(...). + // A guarantee is only visible to an algorithm through the guarantees environment produced by guarantee(...), + // mirroring how requirements are only visible through the requirements environment produced by require(...). const auto genv = exec::guarantee(exec::max_total_num_items<1000>()); const auto resolved = exec::__get_max_total_num_items(exec::__get_guarantees(genv)); static_assert(cuda::std::is_base_of_v>); diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp index e69c2a2b952..8220878dccb 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp @@ -9,7 +9,6 @@ //===----------------------------------------------------------------------===// #include - #include #include #include From 38ff9c881557f7eceb839816abd62806b124b14f Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 5 Jun 2026 02:08:45 -0700 Subject: [PATCH 3/4] includes test_macros header --- libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp | 2 ++ .../test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp index b1301ec12e2..a6174f5eb4a 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/guarantee.fail.cpp @@ -10,6 +10,8 @@ #include +#include "test_macros.h" + [[maybe_unused]] _CCCL_GLOBAL_CONSTANT struct query_t { } query{}; diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp index d9c4d398fc2..baffa2de37b 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.fail.cpp @@ -10,6 +10,8 @@ #include +#include "test_macros.h" + TEST_FUNC void test() { // The bound must be of integral type: a floating-point argument has no viable overload. From 1a1e06b4230f6a5bf21575f7a36324a5faa68820 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Sat, 6 Jun 2026 22:36:29 -0700 Subject: [PATCH 4/4] fixes unuser warning --- .../test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp index 8220878dccb..86081a0d029 100644 --- a/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/execution/max_total_num_items.pass.cpp @@ -36,6 +36,7 @@ TEST_FUNC void test() using holder_t = cuda::std::remove_cvref_t; static_assert(sizeof(holder_t::element_type) == 8); static_assert(holder_t::static_highest == 5'000'000'000); + assert(guarantee.highest() == 5'000'000'000); } // The element type can be selected explicitly through the literal type. @@ -43,6 +44,7 @@ TEST_FUNC void test() const auto guarantee = exec::max_total_num_items(); using holder_t = cuda::std::remove_cvref_t; static_assert(cuda::std::is_same_v); + assert(guarantee.highest() == 1000); } // (b) runtime upper bound: the element type is inferred from the argument, the static bound spans the whole type.