Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_batched_topk.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ struct agent_batched_topk_worker_per_segment
multi_worker_per_segment_policy.threads_per_block * multi_worker_per_segment_policy.items_per_thread;

// Check if there could be large segments present
static constexpr bool only_small_segments = ::cuda::__argument::__traits<SegmentSizeParameterT>::max <= tile_size;
static constexpr bool only_small_segments = ::cuda::__argument::__traits<SegmentSizeParameterT>::highest <= tile_size;

// Check if we are dealing with keys-only or key-value pairs
static constexpr bool is_keys_only = ::cuda::std::is_same_v<value_t, cub::NullType>;
Expand Down Expand Up @@ -224,7 +224,7 @@ struct agent_batched_topk_worker_per_segment
const key_t padding_key =
(direction == detail::topk::select::max)
? ::cuda::std::numeric_limits<key_t>::lowest()
: ::cuda::std::numeric_limits<key_t>::max();
: (::cuda::std::numeric_limits<key_t>::max)();

// Dereference iterator-of-iterators to get the segment specific iterator
auto block_keys_in = d_key_segments_it[segment_id];
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/device/dispatch/dispatch_batched_topk.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ template <typename KeyInputItItT,
typename PolicySelector = policy_selector_from_types<it_value_t<it_value_t<KeyInputItItT>>,
it_value_t<it_value_t<ValueInputItItT>>,
::cuda::std::int64_t,
::cuda::__argument::__traits<KParameterT>::max>>
::cuda::__argument::__traits<KParameterT>::highest>>
#if _CCCL_HAS_CONCEPTS()
requires batched_topk_policy_selector<PolicySelector>
#endif // _CCCL_HAS_CONCEPTS()
Expand Down Expand Up @@ -173,7 +173,7 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t dispatch(
static constexpr bool any_small_segments =
::cuda::__argument::__traits<SegmentSizeParameterT>::lowest <= worker_per_segment_tile_size;
static constexpr bool only_small_segments =
::cuda::__argument::__traits<SegmentSizeParameterT>::max <= worker_per_segment_tile_size;
::cuda::__argument::__traits<SegmentSizeParameterT>::highest <= worker_per_segment_tile_size;

// Allocation layout:
// only_small_segments: [0] dummy.
Expand Down Expand Up @@ -341,7 +341,7 @@ template <typename KeyInputItItT,
policy_selector_from_types<it_value_t<it_value_t<KeyInputItItT>>,
it_value_t<it_value_t<ValueInputItItT>>,
::cuda::std::int64_t,
::cuda::__argument::__traits<KParameterT>::max>;
::cuda::__argument::__traits<KParameterT>::highest>;
return detail::dispatch_with_env_and_tuning<default_policy_selector>(
env, [&](auto policy_selector, void* d_temp_storage, size_t& temp_storage_bytes, cudaStream_t stream) {
return dispatch(
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/kernels/kernel_batched_topk.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ private:
worker_policy worker_per_segment_policy;
multi_worker_policy multi_worker_per_segment_policy;
};
static constexpr ::cuda::std::int64_t max_segment_size = ::cuda::__argument::__traits<SegmentSizeParameterT>::max;
static constexpr ::cuda::std::int64_t max_segment_size = ::cuda::__argument::__traits<SegmentSizeParameterT>::highest;
static constexpr batched_topk_policy active_policy = current_policy<PolicySelector>();

template <int Index>
Expand Down Expand Up @@ -151,7 +151,7 @@ __launch_bounds__(int(
LargeSegmentTileOffsetT>::agent_t;

// Static Assertions (Constraints)
static_assert(agent_t::tile_size >= ::cuda::__argument::__traits<SegmentSizeParameterT>::max,
static_assert(agent_t::tile_size >= ::cuda::__argument::__traits<SegmentSizeParameterT>::highest,
"Block size exceeds maximum segment size supported by SegmentSizeParameterT");
static_assert(sizeof(typename agent_t::TempStorage) <= max_smem_per_block,
"Static shared memory per block must not exceed 48KB limit.");
Expand Down
133 changes: 67 additions & 66 deletions libcudacxx/include/cuda/__argument/argument.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion libcudacxx/include/cuda/__argument/argument_bounds.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ template <class _Tp>
struct __runtime_bounds
{
_Tp __lower_ = ::cuda::std::numeric_limits<_Tp>::lowest();
_Tp __upper_ = ::cuda::std::numeric_limits<_Tp>::max();
_Tp __upper_ = (::cuda::std::numeric_limits<_Tp>::max)();

constexpr __runtime_bounds() noexcept = default;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,36 +137,37 @@ TEST_FUNC void test()
cuda::std::array<int, 3>>);
#endif // TEST_HAS_CLASS_NTTP

// --- argument_traits: lowest / max ---
// --- argument_traits: lowest / highest ---

static_assert(cuda::__argument::__traits<int>::lowest == cuda::std::numeric_limits<int>::lowest());
static_assert(cuda::__argument::__traits<int>::max == cuda::std::numeric_limits<int>::max());
static_assert(cuda::__argument::__traits<int>::highest == (cuda::std::numeric_limits<int>::max)());
static_assert(cuda::__argument::__traits<const int>::lowest == cuda::std::numeric_limits<int>::lowest());
static_assert(cuda::__argument::__traits<int&>::max == cuda::std::numeric_limits<int>::max());
static_assert(cuda::__argument::__traits<int&>::highest == (cuda::std::numeric_limits<int>::max)());
static_assert(cuda::__argument::__traits<float>::lowest == cuda::std::numeric_limits<float>::lowest());
static_assert(cuda::__argument::__traits<float>::max == cuda::std::numeric_limits<float>::max());
static_assert(cuda::__argument::__traits<float>::highest == (cuda::std::numeric_limits<float>::max)());
static_assert(
cuda::__argument::__traits<const cuda::__argument::__immediate<int, cuda::__argument::__static_bounds<1, 8>>>::lowest
== 1);
static_assert(
cuda::__argument::__traits<cuda::__argument::__immediate<int, cuda::__argument::__static_bounds<1, 8>>&>::max == 8);
cuda::__argument::__traits<cuda::__argument::__immediate<int, cuda::__argument::__static_bounds<1, 8>>&>::highest
== 8);
static_assert(
cuda::__argument::__traits<
cuda::__argument::__immediate_sequence<cuda::std::span<int>, cuda::__argument::__static_bounds<1, 8>>>::max
cuda::__argument::__immediate_sequence<cuda::std::span<int>, cuda::__argument::__static_bounds<1, 8>>>::highest
== 8);
#if TEST_HAS_CLASS_NTTP
static_assert(
cuda::__argument::__traits<cuda::__argument::__constant_sequence<cuda::std::array<int, 3>{3, 1, 2}>>::lowest == 1);
static_assert(
cuda::__argument::__traits<cuda::__argument::__constant_sequence<cuda::std::array<int, 3>{3, 1, 2}>>::max == 3);
cuda::__argument::__traits<cuda::__argument::__constant_sequence<cuda::std::array<int, 3>{3, 1, 2}>>::highest == 3);
#endif // TEST_HAS_CLASS_NTTP

// --- Free function bounds on plain values ---

static_assert(cuda::__argument::__lowest_(42) == cuda::std::numeric_limits<int>::lowest());
static_assert(cuda::__argument::__max_(42) == cuda::std::numeric_limits<int>::max());
static_assert(cuda::__argument::__highest_(42) == (cuda::std::numeric_limits<int>::max)());
static_assert(cuda::__argument::__lowest_(1.0f) == cuda::std::numeric_limits<float>::lowest());
static_assert(cuda::__argument::__max_(1.0f) == cuda::std::numeric_limits<float>::max());
static_assert(cuda::__argument::__highest_(1.0f) == (cuda::std::numeric_limits<float>::max)());

// --- Scalar and sequence wrappers expose distinct single-value traits ---

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ TEST_FUNC constexpr bool test()
auto def = cuda::__argument::__deferred{cuda::std::span<int, 1>{&val, 1}};
assert(cuda::__argument::__unwrap(def)[0] == 42);
static_assert(cuda::__argument::__traits<decltype(def)>::lowest == cuda::std::numeric_limits<int>::lowest());
static_assert(cuda::__argument::__traits<decltype(def)>::max == cuda::std::numeric_limits<int>::max());
static_assert(cuda::__argument::__traits<decltype(def)>::highest == (cuda::std::numeric_limits<int>::max)());
}

// Deferred single value with static bounds
Expand All @@ -34,15 +34,15 @@ TEST_FUNC constexpr bool test()
auto def = cuda::__argument::__deferred{cuda::std::span<int, 1>{&val, 1}, cuda::__argument::__bounds<1, 1000>()};
assert(cuda::__argument::__unwrap(def)[0] == 42);
static_assert(cuda::__argument::__traits<decltype(def)>::lowest == 1);
static_assert(cuda::__argument::__traits<decltype(def)>::max == 1000);
static_assert(cuda::__argument::__traits<decltype(def)>::highest == 1000);
}

// Deferred single value via pointer
{
int val = 42;
using def_t = cuda::__argument::__deferred<int*, cuda::__argument::__static_bounds<0, 100>>;
static_assert(cuda::__argument::__traits<def_t>::lowest == 0);
static_assert(cuda::__argument::__traits<def_t>::max == 100);
static_assert(cuda::__argument::__traits<def_t>::highest == 100);
// Also verify construction works
auto def = cuda::__argument::__deferred{&val, cuda::__argument::__bounds<0, 100>()};
assert(cuda::__argument::__unwrap(def) == &val);
Expand All @@ -54,7 +54,7 @@ TEST_FUNC constexpr bool test()
auto def = cuda::__argument::__deferred{it, cuda::__argument::__bounds<0, 100>()};
assert(cuda::__argument::__unwrap(def)[0] == 42);
static_assert(cuda::__argument::__traits<decltype(def)>::lowest == 0);
static_assert(cuda::__argument::__traits<decltype(def)>::max == 100);
static_assert(cuda::__argument::__traits<decltype(def)>::highest == 100);
static_assert(cuda::__argument::__traits<decltype(def)>::is_single_value);
}

Expand All @@ -64,9 +64,9 @@ TEST_FUNC constexpr bool test()
auto def = cuda::__argument::__deferred{
cuda::std::span<int, 1>{&val, 1}, cuda::__argument::__bounds(5, 100), cuda::__argument::__bounds<1, 256>()};
static_assert(cuda::__argument::__traits<decltype(def)>::lowest == 1);
static_assert(cuda::__argument::__traits<decltype(def)>::max == 256);
static_assert(cuda::__argument::__traits<decltype(def)>::highest == 256);
assert(cuda::__argument::__lowest_(def) == 5);
assert(cuda::__argument::__max_(def) == 100);
assert(cuda::__argument::__highest_(def) == 100);
}

// Deferred sequence via fancy iterator
Expand All @@ -76,7 +76,7 @@ TEST_FUNC constexpr bool test()
assert(cuda::__argument::__unwrap(def)[0] == 10);
assert(cuda::__argument::__unwrap(def)[2] == 12);
static_assert(cuda::__argument::__traits<decltype(def)>::lowest == 0);
static_assert(cuda::__argument::__traits<decltype(def)>::max == 100);
static_assert(cuda::__argument::__traits<decltype(def)>::highest == 100);
static_assert(!cuda::__argument::__traits<decltype(def)>::is_single_value);
}

Expand All @@ -87,7 +87,7 @@ TEST_FUNC constexpr bool test()
cuda::std::span<int>{arr, 4}, cuda::__argument::__bounds<1, 4096>(), cuda::__argument::__bounds(5, 100)};
static_assert(cuda::__argument::__traits<decltype(def)>::lowest == 1);
assert(cuda::__argument::__lowest_(def) == 5);
assert(cuda::__argument::__max_(def) == 100);
assert(cuda::__argument::__highest_(def) == 100);
}

// Deferred sequence with both bounds, runtime bounds first
Expand All @@ -96,9 +96,9 @@ TEST_FUNC constexpr bool test()
auto def = cuda::__argument::__deferred_sequence{
cuda::std::span<int>{arr, 4}, cuda::__argument::__bounds(5, 100), cuda::__argument::__bounds<1, 4096>()};
static_assert(cuda::__argument::__traits<decltype(def)>::lowest == 1);
static_assert(cuda::__argument::__traits<decltype(def)>::max == 4096);
static_assert(cuda::__argument::__traits<decltype(def)>::highest == 4096);
assert(cuda::__argument::__lowest_(def) == 5);
assert(cuda::__argument::__max_(def) == 100);
assert(cuda::__argument::__highest_(def) == 100);
}

// Traits: deferred is single value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,19 @@ TEST_FUNC constexpr bool test()
auto da = cuda::__argument::__immediate{5};
assert(cuda::__argument::__unwrap(da) == 5);
static_assert(cuda::__argument::__traits<decltype(da)>::lowest == cuda::std::numeric_limits<int>::lowest());
static_assert(cuda::__argument::__traits<decltype(da)>::max == cuda::std::numeric_limits<int>::max());
static_assert(cuda::__argument::__traits<decltype(da)>::highest == (cuda::std::numeric_limits<int>::max)());
assert(cuda::__argument::__lowest_(da) == 5);
assert(cuda::__argument::__max_(da) == 5);
assert(cuda::__argument::__highest_(da) == 5);
}

// Uniform scalar with static bounds
{
auto da = cuda::__argument::__immediate{5, cuda::__argument::__bounds<1, 8>()};
assert(cuda::__argument::__unwrap(da) == 5);
static_assert(cuda::__argument::__traits<decltype(da)>::lowest == 1);
static_assert(cuda::__argument::__traits<decltype(da)>::max == 8);
static_assert(cuda::__argument::__traits<decltype(da)>::highest == 8);
assert(cuda::__argument::__lowest_(da) == 5);
assert(cuda::__argument::__max_(da) == 5);
assert(cuda::__argument::__highest_(da) == 5);
}

// Non-sequence values are accepted without scalar-only restrictions
Expand All @@ -64,7 +64,7 @@ TEST_FUNC constexpr bool test()
cuda::__argument::__immediate_sequence{cuda::std::span<int>{arr, 4}, cuda::__argument::__bounds(1L, 100L)};
assert(cuda::__argument::__unwrap(da).size() == 4);
assert(cuda::__argument::__lowest_(da) == 1);
assert(cuda::__argument::__max_(da) == 100);
assert(cuda::__argument::__highest_(da) == 100);
}

// Per-segment span with both bounds
Expand All @@ -73,9 +73,9 @@ TEST_FUNC constexpr bool test()
auto da = cuda::__argument::__immediate_sequence{
cuda::std::span<int>{arr, 4}, cuda::__argument::__bounds<1, 256>(), cuda::__argument::__bounds(10, 200)};
static_assert(cuda::__argument::__traits<decltype(da)>::lowest == 1);
static_assert(cuda::__argument::__traits<decltype(da)>::max == 256);
static_assert(cuda::__argument::__traits<decltype(da)>::highest == 256);
assert(cuda::__argument::__lowest_(da) == 10);
assert(cuda::__argument::__max_(da) == 200);
assert(cuda::__argument::__highest_(da) == 200);
}

// Per-segment span with both bounds, runtime bounds first
Expand All @@ -84,9 +84,9 @@ TEST_FUNC constexpr bool test()
auto da = cuda::__argument::__immediate_sequence{
cuda::std::span<int>{arr, 4}, cuda::__argument::__bounds(10, 200), cuda::__argument::__bounds<1, 256>()};
static_assert(cuda::__argument::__traits<decltype(da)>::lowest == 1);
static_assert(cuda::__argument::__traits<decltype(da)>::max == 256);
static_assert(cuda::__argument::__traits<decltype(da)>::highest == 256);
assert(cuda::__argument::__lowest_(da) == 10);
assert(cuda::__argument::__max_(da) == 200);
assert(cuda::__argument::__highest_(da) == 200);
}

// Per-segment via span
Expand All @@ -106,7 +106,7 @@ TEST_FUNC constexpr bool test()
assert(cuda::__argument::__unwrap(da).size() == 4);
assert(cuda::__argument::__unwrap(da)[2] == 30);
static_assert(cuda::__argument::__traits<decltype(da)>::lowest == 1);
static_assert(cuda::__argument::__traits<decltype(da)>::max == 100);
static_assert(cuda::__argument::__traits<decltype(da)>::highest == 100);
}

// Traits
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ TEST_FUNC void test()
{
constexpr auto sa = cuda::__argument::__constant<42>{};
static_assert(cuda::__argument::__lowest_(sa) == 42);
static_assert(cuda::__argument::__max_(sa) == 42);
static_assert(cuda::__argument::__highest_(sa) == 42);
}

#if TEST_HAS_CLASS_NTTP
// Bounds: array sequence computes min/max of elements
// Bounds: array sequence computes lowest/highest of elements
{
constexpr auto sa = cuda::__argument::__constant_sequence<cuda::std::array<int, 3>{128, 256, 512}>{};
static_assert(cuda::__argument::__lowest_(sa) == 128);
static_assert(cuda::__argument::__max_(sa) == 512);
static_assert(cuda::__argument::__highest_(sa) == 512);
}
#endif // TEST_HAS_CLASS_NTTP

Expand All @@ -83,7 +83,7 @@ TEST_FUNC void test()
{
constexpr auto sa = cuda::__argument::__constant_sequence<cuda::std::array<int, 0>{}>{};
static_assert(cuda::__argument::__lowest_(sa) == cuda::std::numeric_limits<int>::lowest());
static_assert(cuda::__argument::__max_(sa) == cuda::std::numeric_limits<int>::max());
static_assert(cuda::__argument::__highest_(sa) == (cuda::std::numeric_limits<int>::max)());
}
#endif // TEST_HAS_CLASS_NTTP

Expand All @@ -95,7 +95,7 @@ TEST_FUNC void test()
static_assert(traits::is_single_value);
static_assert(cuda::std::is_same_v<traits::value_type, int>);
static_assert(traits::lowest == 42);
static_assert(traits::max == 42);
static_assert(traits::highest == 42);
}

#if TEST_HAS_CLASS_NTTP
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

using arg_t = cuda::__argument::__immediate<unsigned char, cuda::__argument::__static_bounds<0, 1000>>;

[[maybe_unused]] constexpr auto invalid_max = cuda::__argument::__traits<arg_t>::max;
[[maybe_unused]] constexpr auto invalid_highest = cuda::__argument::__traits<arg_t>::highest;

int main(int, char**)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ enum class algorithm_variant
template <class _SegSizeArg>
TEST_FUNC constexpr algorithm_variant select_variant(_SegSizeArg)
{
if constexpr (cuda::__argument::__traits<_SegSizeArg>::max <= shared_memory_capacity)
if constexpr (cuda::__argument::__traits<_SegSizeArg>::highest <= shared_memory_capacity)
{
return algorithm_variant::shared_memory;
}
Expand All @@ -48,8 +48,8 @@ TEST_FUNC constexpr algorithm_variant select_variant(_SegSizeArg)
template <class _SegSizeArg>
TEST_FUNC constexpr int compute_buffer_size(_SegSizeArg __seg_size, int __num_segments)
{
auto __max = cuda::std::min(default_max_segment_size, static_cast<int>(cuda::__argument::__max_(__seg_size)));
return __max * __num_segments;
auto __highest = cuda::std::min(default_max_segment_size, static_cast<int>(cuda::__argument::__highest_(__seg_size)));
return __highest * __num_segments;
}

// Process: use the actual unwrapped value.
Expand Down Expand Up @@ -100,15 +100,15 @@ TEST_FUNC constexpr bool test()
}

#if TEST_HAS_CLASS_NTTP
// static_argument: array sequence, max fits in shared memory
// static_argument: array sequence, highest fits in shared memory
{
constexpr auto seg_sizes = cuda::__argument::__constant_sequence<cuda::std::array{64, 128, 256}>{};
static_assert(select_variant(seg_sizes) == algorithm_variant::shared_memory);
assert(compute_buffer_size(seg_sizes, 3) == 256 * 3);
assert(process_segments(seg_sizes) == 64 + 128 + 256);
}

// static_argument: array sequence, max exceeds shared memory, buffer clamped
// static_argument: array sequence, highest exceeds shared memory, buffer clamped
{
constexpr auto seg_sizes = cuda::__argument::__constant_sequence<cuda::std::array{64, 128, 512}>{};
static_assert(select_variant(seg_sizes) == algorithm_variant::global_memory);
Expand Down Expand Up @@ -156,7 +156,7 @@ TEST_FUNC constexpr bool test()
int sizes[3] = {64, 128, 96};
auto seg_sizes = cuda::__argument::__immediate_sequence{
cuda::std::span<int>{sizes, 3}, cuda::__argument::__bounds<1, 256>(), cuda::__argument::__bounds(1, 200)};
static_assert(cuda::__argument::__traits<decltype(seg_sizes)>::max <= shared_memory_capacity);
static_assert(cuda::__argument::__traits<decltype(seg_sizes)>::highest <= shared_memory_capacity);
assert(select_variant(seg_sizes) == algorithm_variant::shared_memory);
assert(compute_buffer_size(seg_sizes, 3) == 200 * 3);
assert(process_segments(seg_sizes) == 64 + 128 + 96);
Expand All @@ -167,7 +167,7 @@ TEST_FUNC constexpr bool test()
int val = 100;
auto seg_size = cuda::__argument::__deferred{
cuda::std::span<int, 1>{&val, 1}, cuda::__argument::__bounds<1, 256>(), cuda::__argument::__bounds(1, 200)};
static_assert(cuda::__argument::__traits<decltype(seg_size)>::max <= shared_memory_capacity);
static_assert(cuda::__argument::__traits<decltype(seg_size)>::highest <= shared_memory_capacity);
assert(select_variant(seg_size) == algorithm_variant::shared_memory);
assert(compute_buffer_size(seg_size, 4) == 200 * 4);
}
Expand Down
Loading