-
Notifications
You must be signed in to change notification settings - Fork 401
[libcu++] Make argument namespace and wrappers construction public #9251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
06c277f
3210e3d
2e084a9
2c740b7
9088870
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,8 +23,8 @@ | |
| #include <cub/device/dispatch/tuning/tuning_batched_topk.cuh> | ||
| #include <cub/util_type.cuh> | ||
|
|
||
| #include <cuda/__argument_> | ||
| #include <cuda/__cmath/ceil_div.h> | ||
| #include <cuda/argument> | ||
|
|
||
| CUB_NAMESPACE_BEGIN | ||
|
|
||
|
|
@@ -73,8 +73,8 @@ struct agent_batched_topk_worker_per_segment | |
| using key_t = it_value_t<key_it_t>; | ||
| using value_t = it_value_t<value_it_t>; | ||
|
|
||
| using segment_size_val_t = typename ::cuda::__argument::__traits<SegmentSizeParameterT>::element_type; | ||
| using num_segments_val_t = typename ::cuda::__argument::__traits<NumSegmentsParameterT>::element_type; | ||
| using segment_size_val_t = typename ::cuda::argument::__traits<SegmentSizeParameterT>::element_type; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this library usable without exposing
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main user facing part is the construction of the wrappers, while our APIs will use the traits to examine them. Long term we want to expose the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for explaining! That makes sense. |
||
| using num_segments_val_t = typename ::cuda::argument::__traits<NumSegmentsParameterT>::element_type; | ||
| using counters_t = batched_topk_counters<num_segments_val_t>; | ||
|
|
||
| static constexpr auto policy = PolicyGetter{}(); | ||
|
|
@@ -95,7 +95,7 @@ struct agent_batched_topk_worker_per_segment | |
| multi_worker_per_segment_policy.threads_per_block * multi_worker_per_segment_policy.items_per_thread; | ||
|
|
||
| // Check if there could be large segments present | ||
| static constexpr bool only_small_segments = ::cuda::__argument::__traits<SegmentSizeParameterT>::highest <= tile_size; | ||
| static constexpr bool only_small_segments = ::cuda::argument::__traits<SegmentSizeParameterT>::highest <= tile_size; | ||
|
|
||
| // Check if we are dealing with keys-only or key-value pairs | ||
| static constexpr bool is_keys_only = ::cuda::std::is_same_v<value_t, cub::NullType>; | ||
|
|
@@ -196,8 +196,8 @@ struct agent_batched_topk_worker_per_segment | |
| return; | ||
| } | ||
|
|
||
| constexpr bool is_full_tile = ::cuda::__argument::__traits<SegmentSizeParameterT>::is_constant | ||
| && ::cuda::__argument::__traits<SegmentSizeParameterT>::lowest == tile_size; | ||
| constexpr bool is_full_tile = ::cuda::argument::__traits<SegmentSizeParameterT>::is_constant | ||
| && ::cuda::argument::__traits<SegmentSizeParameterT>::lowest == tile_size; | ||
|
|
||
| // Resolve Segment Parameters | ||
| const auto segment_size = params::get_param(segment_sizes, segment_id); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,10 +31,10 @@ | |
|
|
||
| #include <thrust/system/cuda/detail/core/triple_chevron_launch.h> | ||
|
|
||
| #include <cuda/__argument_> | ||
| #include <cuda/__cmath/ceil_div.h> | ||
| #include <cuda/__iterator/counting_iterator.h> | ||
| #include <cuda/__iterator/transform_iterator.h> | ||
| #include <cuda/argument> | ||
| #include <cuda/std/__functional/operations.h> | ||
| #include <cuda/std/__type_traits/is_same.h> | ||
| #include <cuda/std/__type_traits/remove_cv.h> | ||
|
|
@@ -49,9 +49,9 @@ namespace detail::batched_topk | |
| // Internal: wrap user-facing select direction into discrete param for dispatch | ||
| // ----------------------------------------------------------------------------- | ||
|
|
||
| // Uniform (compile-time): __constant<Dir> -> single-option uniform_discrete_param. | ||
| // Uniform (compile-time): constant<Dir> -> single-option uniform_discrete_param. | ||
| template <detail::topk::select Dir> | ||
| [[nodiscard]] _CCCL_HOST_DEVICE auto wrap_select_direction(::cuda::__argument::__constant<Dir>) | ||
| [[nodiscard]] _CCCL_HOST_DEVICE auto wrap_select_direction(::cuda::argument::constant<Dir>) | ||
| { | ||
| return params::uniform_discrete_param<detail::topk::select, Dir>{Dir}; | ||
| } | ||
|
|
@@ -126,7 +126,7 @@ template <typename KeyInputItItT, | |
| typename PolicySelector = policy_selector_from_types<it_value_t<it_value_t<KeyInputItItT>>, | ||
| it_value_t<it_value_t<ValueInputItItT>>, | ||
| ::cuda::std::int64_t, | ||
| ::cuda::__argument::__traits<KParameterT>::highest>> | ||
| ::cuda::argument::__traits<KParameterT>::highest>> | ||
| #if _CCCL_HAS_CONCEPTS() | ||
| requires batched_topk_policy_selector<PolicySelector> | ||
| #endif // _CCCL_HAS_CONCEPTS() | ||
|
|
@@ -145,7 +145,7 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t dispatch( | |
| cudaStream_t stream = nullptr, | ||
| [[maybe_unused]] PolicySelector policy_selector = {}) | ||
| { | ||
| using large_segment_tile_offset_t = typename ::cuda::__argument::__traits<TotalNumItemsGuaranteeT>::element_type; | ||
| using large_segment_tile_offset_t = typename ::cuda::argument::__traits<TotalNumItemsGuaranteeT>::element_type; | ||
|
|
||
| // Wrap the raw enum into the internal discrete param type | ||
| auto select_directions = wrap_select_direction(select_direction); | ||
|
|
@@ -171,9 +171,9 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t dispatch( | |
| static constexpr int worker_per_segment_tile_size = | ||
| worker_per_segment_policy.threads_per_block * worker_per_segment_policy.items_per_thread; | ||
| static constexpr bool any_small_segments = | ||
| ::cuda::__argument::__traits<SegmentSizeParameterT>::lowest <= worker_per_segment_tile_size; | ||
| ::cuda::argument::__traits<SegmentSizeParameterT>::lowest <= worker_per_segment_tile_size; | ||
| static constexpr bool only_small_segments = | ||
| ::cuda::__argument::__traits<SegmentSizeParameterT>::highest <= worker_per_segment_tile_size; | ||
| ::cuda::argument::__traits<SegmentSizeParameterT>::highest <= worker_per_segment_tile_size; | ||
|
|
||
| // Allocation layout: | ||
| // only_small_segments: [0] dummy. | ||
|
|
@@ -183,7 +183,7 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t dispatch( | |
| static constexpr int allocations_array_size = only_small_segments ? 1 : (any_small_segments ? 3 : 2); | ||
| size_t allocation_sizes[allocations_array_size] = {1}; | ||
|
|
||
| using num_segments_val_t = typename ::cuda::__argument::__traits<NumSegmentsParameterT>::element_type; | ||
| using num_segments_val_t = typename ::cuda::argument::__traits<NumSegmentsParameterT>::element_type; | ||
| using counters_t = batched_topk_counters<num_segments_val_t>; | ||
| using segment_size_scan_offset_t = detail::choose_offset_t<num_segments_val_t>; | ||
| using segment_size_scan_input_op_t = | ||
|
|
@@ -239,7 +239,7 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t dispatch( | |
|
|
||
| // TODO (elstehle): support number of segments provided by device-accessible iterator | ||
| // Only uniform number of segments are supported (i.e., we need to resolve the number of segments on the host) | ||
| static_assert(::cuda::__argument::__traits<NumSegmentsParameterT>::is_single_value, | ||
| static_assert(::cuda::argument::__traits<NumSegmentsParameterT>::is_single_value, | ||
| "Only uniform segment sizes are currently supported."); | ||
|
Comment on lines
+242
to
243
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Fix the static_assert message to match the checked condition. |
||
|
|
||
| if constexpr (any_small_segments) | ||
|
|
@@ -341,7 +341,7 @@ template <typename KeyInputItItT, | |
| policy_selector_from_types<it_value_t<it_value_t<KeyInputItItT>>, | ||
| it_value_t<it_value_t<ValueInputItItT>>, | ||
| ::cuda::std::int64_t, | ||
| ::cuda::__argument::__traits<KParameterT>::highest>; | ||
| ::cuda::argument::__traits<KParameterT>::highest>; | ||
| return detail::dispatch_with_env_and_tuning<default_policy_selector>( | ||
| env, [&](auto policy_selector, void* d_temp_storage, size_t& temp_storage_bytes, cudaStream_t stream) { | ||
| return dispatch( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
C++26 has
constant_wrapper. It's hard to back-port fully to earlier C++ versions (though there are proposals pending review that might fix this), but for now, it's how C++ spells "compile-time constant." Does thiscuda::argument::constantdo the same thing? If so, then should we consider just making this an alias toconstant_wrapper(or a hypotheticalcuda::std::constant_wrapper)?