From a4f9728ab606e7dbfb5f1dc6ca108404eccea4cf Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Wed, 24 Jun 2026 16:27:54 +0200 Subject: [PATCH 01/12] replace PartitionNulls by PartitionNullsAndNans --- .../compute/kernels/vector_array_sort.cc | 42 +++++++++++------- .../arrow/compute/kernels/vector_select_k.cc | 23 ---------- .../compute/kernels/vector_sort_internal.h | 44 ++++++++----------- 3 files changed, 43 insertions(+), 66 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc index 6e7068f6ecf6..4e9c17a110cb 100644 --- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc @@ -71,22 +71,25 @@ struct PartitionNthToIndices { return Status::IndexError("NthToIndices index out of bound"); } ArrayData* out_arr = out->array_data().get(); - uint64_t* out_begin = out_arr->GetMutableValues(1); - uint64_t* out_end = out_begin + arr.length(); - std::iota(out_begin, out_end, 0); + std::span out_span{out_arr->GetMutableValues(1), + static_cast(arr.length())}; + std::iota(out_span.begin(), out_span.end(), 0); if (pivot == arr.length()) { return Status::OK(); } - const auto p = PartitionNulls( - out_begin, out_end, arr, 0, options.null_placement); - auto nth_begin = out_begin + pivot; - if (nth_begin >= p.non_nulls_begin && nth_begin < p.non_nulls_end) { - std::nth_element(p.non_nulls_begin, nth_begin, p.non_nulls_end, - [&arr](uint64_t left, uint64_t right) { - const auto lval = GetView::LogicalValue(arr.GetView(left)); - const auto rval = GetView::LogicalValue(arr.GetView(right)); - return lval < rval; - }); + const auto p = PartitionNullsAndNans( + out_span.data(), out_span.data() + out_span.size(), arr, 0, + options.null_placement); + auto nth_begin = out_span.begin() + pivot; + if (nth_begin >= p.non_null_like_range.begin() && + nth_begin < p.non_null_like_range.end()) { + std::ranges::nth_element( + p.non_null_like_range.begin(), nth_begin, p.non_null_like_range.end(), + [&arr](uint64_t left, uint64_t right) { + const auto lval = GetView::LogicalValue(arr.GetView(left)); + const auto rval = GetView::LogicalValue(arr.GetView(right)); + return lval < rval; + }); } return Status::OK(); } @@ -150,11 +153,11 @@ class ArrayCompareSorter { const ArraySortOptions& options, ExecContext*) { const auto& values = checked_cast(array); - const auto p = PartitionNulls( + const auto p = PartitionNullsAndNans( indices_begin, indices_end, values, offset, options.null_placement); if (options.order == SortOrder::Ascending) { std::stable_sort( - p.non_nulls_begin, p.non_nulls_end, + p.non_null_like_range.begin(), p.non_null_like_range.end(), [&values, &offset](uint64_t left, uint64_t right) { const auto lhs = GetView::LogicalValue(values.GetView(left - offset)); const auto rhs = GetView::LogicalValue(values.GetView(right - offset)); @@ -162,7 +165,7 @@ class ArrayCompareSorter { }); } else { std::stable_sort( - p.non_nulls_begin, p.non_nulls_end, + p.non_null_like_range.begin(), p.non_null_like_range.end(), [&values, &offset](uint64_t left, uint64_t right) { const auto lhs = GetView::LogicalValue(values.GetView(left - offset)); const auto rhs = GetView::LogicalValue(values.GetView(right - offset)); @@ -171,7 +174,12 @@ class ArrayCompareSorter { return rhs < lhs; }); } - return p; + return NullPartitionResult{ + p.non_null_like_range.data(), + p.non_null_like_range.data() + p.non_null_like_range.size(), + std::min(p.null_range.data(), p.nan_range.data()), + std::max(p.null_range.data() + p.null_range.size(), + p.nan_range.data() + p.nan_range.size())}; } }; diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc index cc375919658c..4cba5db58eae 100644 --- a/cpp/src/arrow/compute/kernels/vector_select_k.cc +++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc @@ -166,29 +166,6 @@ void HeapSortNonNullsToOutput(std::span non_null_input_range, } } -struct PartitionResultByNullLikeness { - std::span non_null_like_range; - std::span null_range; - std::span nan_range; -}; - -template -PartitionResultByNullLikeness PartitionNullsAndNans(uint64_t* indices_begin, - uint64_t* indices_end, - const ArrayType& values, - int64_t offset, - NullPlacement null_placement) { - // Partition nulls at start (resp. end), and null-like values just before (resp. after) - NullPartitionResult p = PartitionNullsOnly(indices_begin, indices_end, - values, offset, null_placement); - NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); - return PartitionResultByNullLikeness{ - .non_null_like_range = {q.non_nulls_begin, q.non_nulls_end}, - .null_range = {p.nulls_begin, p.nulls_end}, - .nan_range = {q.nulls_begin, q.nulls_end}}; -} - class ArraySelector : public TypeVisitor { public: ArraySelector(ExecContext* ctx, const Array& array, const SelectKOptions& options, diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index 06d911b2c0c8..282404b1e013 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -224,23 +224,6 @@ NullPartitionResult PartitionNullLikes(uint64_t* indices_begin, uint64_t* indice } } -// Move nulls to end of array. -// -// `offset` is used when this is called on a chunk of a chunked array -template -NullPartitionResult PartitionNulls(uint64_t* indices_begin, uint64_t* indices_end, - const ArrayType& values, int64_t offset, - NullPlacement null_placement) { - // Partition nulls at start (resp. end), and null-like values just before (resp. after) - NullPartitionResult p = PartitionNullsOnly(indices_begin, indices_end, - values, offset, null_placement); - NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); - return NullPartitionResult{q.non_nulls_begin, q.non_nulls_end, - std::min(q.nulls_begin, p.nulls_begin), - std::max(q.nulls_end, p.nulls_end)}; -} - // // Null partitioning on chunked arrays, in two flavors: // 1) with uint64_t indices and ChunkedArrayResolver @@ -327,18 +310,27 @@ NullPartitionResult PartitionNullLikes(uint64_t* indices_begin, uint64_t* indice } } +struct PartitionResultByNullLikeness { + std::span non_null_like_range; + std::span null_range; + std::span nan_range; +}; + template -NullPartitionResult PartitionNulls(uint64_t* indices_begin, uint64_t* indices_end, - const ChunkedArrayResolver& resolver, - int64_t null_count, NullPlacement null_placement) { +PartitionResultByNullLikeness PartitionNullsAndNans(uint64_t* indices_begin, + uint64_t* indices_end, + const ArrayType& values, + int64_t offset, + NullPlacement null_placement) { // Partition nulls at start (resp. end), and null-like values just before (resp. after) - NullPartitionResult p = PartitionNullsOnly( - indices_begin, indices_end, resolver, null_count, null_placement); + NullPartitionResult p = PartitionNullsOnly(indices_begin, indices_end, + values, offset, null_placement); NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, resolver, null_placement); - return NullPartitionResult{q.non_nulls_begin, q.non_nulls_end, - std::min(q.nulls_begin, p.nulls_begin), - std::max(q.nulls_end, p.nulls_end)}; + p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); + return PartitionResultByNullLikeness{ + .non_null_like_range = {q.non_nulls_begin, q.non_nulls_end}, + .null_range = {p.nulls_begin, p.nulls_end}, + .nan_range = {q.nulls_begin, q.nulls_end}}; } template From 5fe6eb847167fbec48dcd89d1d70729489bb2359 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Wed, 24 Jun 2026 18:46:20 +0200 Subject: [PATCH 02/12] more helpers and migrations --- .../compute/kernels/vector_array_sort.cc | 7 +-- .../compute/kernels/vector_sort_internal.h | 49 ++++++++++++++++--- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc index 4e9c17a110cb..b0020b608d08 100644 --- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc @@ -174,12 +174,7 @@ class ArrayCompareSorter { return rhs < lhs; }); } - return NullPartitionResult{ - p.non_null_like_range.data(), - p.non_null_like_range.data() + p.non_null_like_range.size(), - std::min(p.null_range.data(), p.nan_range.data()), - std::max(p.null_range.data() + p.null_range.size(), - p.nan_range.data() + p.nan_range.size())}; + return p.toLegacyNullPartitionResult(); } }; diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index 282404b1e013..671991d1d3c3 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -25,6 +25,7 @@ #include "arrow/array.h" #include "arrow/compute/api_vector.h" +#include "arrow/compute/kernel.h" #include "arrow/compute/kernels/chunked_internal.h" #include "arrow/table.h" #include "arrow/type.h" @@ -167,10 +168,49 @@ struct GenericNullPartitionResult { }; } }; - using NullPartitionResult = GenericNullPartitionResult; using ChunkedNullPartitionResult = GenericNullPartitionResult; +template +struct GenericPartitionResultByNullLikeness { + std::span non_null_like_range; + std::span null_range; + std::span nan_range; + + IndexType* overall_begin() const { + return std::min(non_null_like_range.begin(), null_range.begin(), nan_range.begin()); + } + + IndexType* overall_end() const { + return std::max(non_null_like_range.end(), null_range.end(), nan_range.end()); + } + + GenericNullPartitionResult toLegacyNullPartitionResult() const { + return GenericNullPartitionResult{ + non_null_like_range.data(), + non_null_like_range.data() + non_null_like_range.size(), + std::min(null_range.data(), nan_range.data()), + std::max(null_range.data() + null_range.size(), + nan_range.data() + nan_range.size())}; + } + + template + GenericPartitionResultByNullLikeness TranslateTo( + IndexType* indices_begin, TargetIndexType* target_indices_begin) const { + return {.non_null_like_range = {(non_null_like_range.data() - indices_begin) + + target_indices_begin, + non_null_like_range.size()}, + .null_range = {(null_range.data() - indices_begin) + target_indices_begin, + null_range.size()}, + .nan_range = {(nan_range.data() - indices_begin) + target_indices_begin, + nan_range.size()}}; + } +}; + +using PartitionResultByNullLikeness = GenericPartitionResultByNullLikeness; +using ChunkedPartitionResultByNullLikeness = + GenericPartitionResultByNullLikeness; + // Move nulls (not null-like values) to end of array. // // `offset` is used when this is called on a chunk of a chunked array @@ -310,12 +350,6 @@ NullPartitionResult PartitionNullLikes(uint64_t* indices_begin, uint64_t* indice } } -struct PartitionResultByNullLikeness { - std::span non_null_like_range; - std::span null_range; - std::span nan_range; -}; - template PartitionResultByNullLikeness PartitionNullsAndNans(uint64_t* indices_begin, uint64_t* indices_end, @@ -445,7 +479,6 @@ struct GenericMergeImpl { IndexType* temp_indices_ = nullptr; }; -using MergeImpl = GenericMergeImpl; using ChunkedMergeImpl = GenericMergeImpl; From 6225331d41bab62a270d191128a417db2b3e4c79 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Wed, 24 Jun 2026 20:44:42 +0200 Subject: [PATCH 03/12] spanify one --- .../compute/kernels/vector_array_sort.cc | 105 +++++++++--------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc index b0020b608d08..d7d3d30df8ae 100644 --- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc @@ -148,13 +148,14 @@ class ArrayCompareSorter { using GetView = GetViewType; public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, ExecContext*) { + Result operator()(std::span indices, const Array& array, + int64_t offset, const ArraySortOptions& options, + ExecContext*) { const auto& values = checked_cast(array); const auto p = PartitionNullsAndNans( - indices_begin, indices_end, values, offset, options.null_placement); + indices.data(), indices.data() + indices.size(), values, offset, + options.null_placement); if (options.order == SortOrder::Ascending) { std::stable_sort( p.non_null_like_range.begin(), p.non_null_like_range.end(), @@ -181,9 +182,8 @@ class ArrayCompareSorter { template <> class ArrayCompareSorter { public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, + Result operator()(std::span indices, const Array& array, + int64_t offset, const ArraySortOptions& options, ExecContext* ctx) { const auto& dict_array = checked_cast(array); const auto& dict_values = dict_array.dictionary(); @@ -223,7 +223,7 @@ class ArrayCompareSorter { DCHECK_EQ(decoded_ranks->length(), dict_array.length()); ARROW_ASSIGN_OR_RAISE(auto rank_sorter, GetArraySorter(*decoded_ranks->type())); - return rank_sorter(indices_begin, indices_end, *decoded_ranks, offset, options, ctx); + return rank_sorter(indices, *decoded_ranks, offset, options, ctx); } private: @@ -267,12 +267,11 @@ class ArrayCompareSorter { template <> class ArrayCompareSorter { public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, + Result operator()(std::span indices, const Array& array, + int64_t offset, const ArraySortOptions& options, ExecContext* ctx) { const auto& struct_array = checked_cast(array); - return SortStructArray(ctx, indices_begin, indices_end, struct_array, options.order, + return SortStructArray(ctx, indices, struct_array, options.order, options.null_placement); } }; @@ -293,17 +292,16 @@ class ArrayCountSorter { value_range_ = static_cast(max - min) + 1; } - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, + Result operator()(std::span indices, const Array& array, + int64_t offset, const ArraySortOptions& options, ExecContext*) const { const auto& values = checked_cast(array); // 32bit counter performs much better than 64bit one if (values.length() < (1LL << 32)) { - return SortInternal(indices_begin, indices_end, values, offset, options); + return SortInternal(indices, values, offset, options); } else { - return SortInternal(indices_begin, indices_end, values, offset, options); + return SortInternal(indices, values, offset, options); } } @@ -312,8 +310,8 @@ class ArrayCountSorter { uint32_t value_range_{0}; template - NullPartitionResult SortInternal(uint64_t* indices_begin, uint64_t* indices_end, - const ArrayType& values, int64_t offset, + NullPartitionResult SortInternal(std::span indices, const ArrayType& values, + int64_t offset, const ArraySortOptions& options) const { const uint32_t value_range = value_range_; @@ -329,11 +327,13 @@ class ArrayCountSorter { } if (options.null_placement == NullPlacement::AtStart) { - p = NullPartitionResult::NullsAtStart(indices_begin, indices_end, - indices_end - counts[value_range]); + p = NullPartitionResult::NullsAtStart( + indices.data(), indices.data() + indices.size(), + indices.data() + indices.size() - counts[value_range]); } else { - p = NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - indices_begin + counts[value_range]); + p = NullPartitionResult::NullsAtEnd(indices.data(), + indices.data() + indices.size(), + indices.data() + counts[value_range]); } EmitIndices(p, values, offset, &counts[0]); } else { @@ -344,11 +344,12 @@ class ArrayCountSorter { } if (options.null_placement == NullPlacement::AtStart) { - p = NullPartitionResult::NullsAtStart(indices_begin, indices_end, - indices_end - counts[0]); + p = NullPartitionResult::NullsAtStart( + indices.data(), indices.data() + indices.size(), + indices.data() + indices.size() - counts[0]); } else { - p = NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - indices_begin + counts[0]); + p = NullPartitionResult::NullsAtEnd( + indices.data(), indices.data() + indices.size(), indices.data() + counts[0]); } EmitIndices(p, values, offset, &counts[1]); } @@ -378,9 +379,9 @@ class ArrayCountSorter { public: ArrayCountSorter() = default; - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, ExecContext*) { + Result operator()(std::span indices, const Array& array, + int64_t offset, const ArraySortOptions& options, + ExecContext*) { const auto& values = checked_cast(array); std::array counts{0, 0, 0}; // false, true, null @@ -391,11 +392,11 @@ class ArrayCountSorter { NullPartitionResult p; if (options.null_placement == NullPlacement::AtStart) { - p = NullPartitionResult::NullsAtStart(indices_begin, indices_end, - indices_begin + nulls); + p = NullPartitionResult::NullsAtStart( + indices.data(), indices.data() + indices.size(), indices.data() + nulls); } else { - p = NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - indices_end - nulls); + p = NullPartitionResult::NullsAtEnd(indices.data(), indices.data() + indices.size(), + indices.data() + indices.size() - nulls); } if (options.order == SortOrder::Ascending) { @@ -423,9 +424,8 @@ class ArrayCountOrCompareSorter { using c_type = typename ArrowType::c_type; public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, + Result operator()(std::span indices, const Array& array, + int64_t offset, const ArraySortOptions& options, ExecContext* ctx) { const auto& values = checked_cast(array); @@ -438,11 +438,11 @@ class ArrayCountOrCompareSorter { if (static_cast(max) - static_cast(min) <= countsort_max_range_) { count_sorter_.SetMinMax(min, max); - return count_sorter_(indices_begin, indices_end, values, offset, options, ctx); + return count_sorter_(indices, values, offset, options, ctx); } } - return compare_sorter_(indices_begin, indices_end, values, offset, options, ctx); + return compare_sorter_(indices, values, offset, options, ctx); } private: @@ -464,10 +464,10 @@ class ArrayCountOrCompareSorter { class ArrayNullSorter { public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& values, int64_t offset, - const ArraySortOptions& options, ExecContext*) { - return NullPartitionResult::NullsOnly(indices_begin, indices_end, + Result operator()(std::span indices, const Array& values, + int64_t offset, const ArraySortOptions& options, + ExecContext*) { + return NullPartitionResult::NullsOnly(indices.data(), indices.data() + indices.size(), options.null_placement); } }; @@ -545,14 +545,14 @@ struct ArraySortIndices { static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const auto& options = ArraySortIndicesState::Get(ctx); ArrayData* out_arr = out->array_data().get(); - uint64_t* out_begin = out_arr->GetMutableValues(1); - uint64_t* out_end = out_begin + out_arr->length; - std::iota(out_begin, out_end, 0); + std::span out_span{out_arr->GetMutableValues(1), + static_cast(out_arr->length)}; + std::iota(out_span.begin(), out_span.end(), 0); ArrayType arr(batch[0].array.ToArrayData()); ARROW_ASSIGN_OR_RAISE(auto sorter, GetArraySorter(*GetPhysicalType(arr.type()))); - return sorter(out_begin, out_end, arr, 0, options, ctx->exec_context()).status(); + return sorter(out_span, arr, 0, options, ctx->exec_context()).status(); } }; @@ -560,12 +560,11 @@ Status ArraySortIndicesChunked(KernelContext* ctx, const ExecBatch& batch, Datum const auto& options = ArraySortIndicesState::Get(ctx); ArrayData* out_arr = out->mutable_array(); DCHECK_EQ(out_arr->length, batch.length); - uint64_t* out_begin = out_arr->GetMutableValues(1); - uint64_t* out_end = out_begin + out_arr->length; - std::iota(out_begin, out_end, 0); - return SortChunkedArray(ctx->exec_context(), out_begin, out_end, - *batch[0].chunked_array(), options.order, - options.null_placement) + std::span out_span{out_arr->GetMutableValues(1), + static_cast(out_arr->length)}; + std::iota(out_span.begin(), out_span.end(), 0); + return SortChunkedArray(ctx->exec_context(), out_span, *batch[0].chunked_array(), + options.order, options.null_placement) .status(); } From 650d857e2b2ecec0ea1a34f6055cfc0dcddba929 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Wed, 24 Jun 2026 20:46:56 +0200 Subject: [PATCH 04/12] spanify another --- cpp/src/arrow/compute/kernels/vector_sort.cc | 146 +++++++++---------- 1 file changed, 68 insertions(+), 78 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index 06eed160233e..3a03bbb6830b 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -45,13 +45,12 @@ Result BatchesFromTable(const Table& table) { // then merging the sorted chunks recursively. class ChunkedArraySorter : public TypeVisitor { public: - ChunkedArraySorter(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, + ChunkedArraySorter(ExecContext* ctx, std::span indices, const std::shared_ptr& physical_type, const ArrayVector& physical_chunks, const SortOrder order, const NullPlacement null_placement, NullPartitionResult* output) : TypeVisitor(), - indices_begin_(indices_begin), - indices_end_(indices_end), + indices_(indices), physical_type_(physical_type), physical_chunks_(physical_chunks), order_(order), @@ -72,7 +71,7 @@ class ChunkedArraySorter : public TypeVisitor { #undef VISIT Status Visit(const NullType&) override { - std::iota(indices_begin_, indices_end_, 0); + std::iota(indices_.begin(), indices_.end(), 0); return Status::OK(); } @@ -83,10 +82,13 @@ class ChunkedArraySorter : public TypeVisitor { ArraySortOptions options(order_, null_placement_); const auto num_chunks = static_cast(physical_chunks_.size()); if (num_chunks == 0) { - *output_ = {indices_end_, indices_end_, indices_end_, indices_end_}; + *output_ = {.non_nulls_begin = indices_.data() + indices_.size(), + .non_nulls_end = indices_.data() + indices_.size(), + .nulls_begin = indices_.data() + indices_.size(), + .nulls_end = indices_.data() + indices_.size()}; return Status::OK(); } - const int64_t num_indices = static_cast(indices_end_ - indices_begin_); + const int64_t num_indices = static_cast(indices_.size()); const auto arrays = GetArrayPointers(physical_chunks_); // Sort each chunk independently and merge to sorted indices. @@ -101,23 +103,22 @@ class ChunkedArraySorter : public TypeVisitor { const auto array = checked_cast(arrays[i]); end_offset += array->length(); null_count += array->null_count(); - ARROW_ASSIGN_OR_RAISE(sorted[i], array_sorter_(indices_begin_ + begin_offset, - indices_begin_ + end_offset, *array, - begin_offset, options, ctx_)); + ARROW_ASSIGN_OR_RAISE(sorted[i], + array_sorter_(indices_.data(), indices_.data() + indices_.size(), *array, begin_offset, options, ctx_)); begin_offset = end_offset; } DCHECK_EQ(end_offset, num_indices); // Then merge them by pairs, recursively if (sorted.size() > 1) { - ChunkedIndexMapper chunked_mapper(arrays, indices_begin_, indices_end_); + ChunkedIndexMapper chunked_mapper(arrays, indices_.data(), indices_.data() + indices_.size()); ARROW_ASSIGN_OR_RAISE(auto chunked_indices_pair, chunked_mapper.LogicalToPhysical()); auto [chunked_indices_begin, chunked_indices_end] = chunked_indices_pair; std::vector chunk_sorted(num_chunks); for (int i = 0; i < num_chunks; ++i) { - chunk_sorted[i] = sorted[i].TranslateTo(indices_begin_, chunked_indices_begin); + chunk_sorted[i] = sorted[i].TranslateTo(indices_.data(), chunked_indices_begin); } auto merge_nulls = [&](CompressedChunkLocation* nulls_begin, @@ -160,14 +161,14 @@ class ChunkedArraySorter : public TypeVisitor { // Reverse everything sorted.resize(1); - sorted[0] = chunk_sorted[0].TranslateTo(chunked_indices_begin, indices_begin_); + sorted[0] = chunk_sorted[0].TranslateTo(chunked_indices_begin, indices_.data()); RETURN_NOT_OK(chunked_mapper.PhysicalToLogical()); } DCHECK_EQ(sorted.size(), 1); - DCHECK_EQ(sorted[0].overall_begin(), indices_begin_); - DCHECK_EQ(sorted[0].overall_end(), indices_end_); + DCHECK_EQ(sorted[0].overall_begin(), indices_.data()); + DCHECK_EQ(sorted[0].overall_end(), indices_.data() + indices_.size()); // Note that "nulls" can also include NaNs, hence the >= check DCHECK_GE(sorted[0].null_count(), null_count); @@ -211,8 +212,7 @@ class ChunkedArraySorter : public TypeVisitor { .template Value(); } - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; const std::shared_ptr& physical_type_; const ArrayVector& physical_chunks_; const SortOrder order_; @@ -395,17 +395,14 @@ class RadixRecordBatchSorter { public: using ResolvedSortKey = ResolvedRecordBatchSortKey; - RadixRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, + RadixRecordBatchSorter(std::span indices, std::vector sort_keys) - : sort_keys_(std::move(sort_keys)), - indices_begin_(indices_begin), - indices_end_(indices_end) {} + : sort_keys_(std::move(sort_keys)), indices_(indices) {} - RadixRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, - const RecordBatch& batch, const SortOptions& options) + RadixRecordBatchSorter(std::span indices, const RecordBatch& batch, + const SortOptions& options) : sort_keys_(ResolveRecordBatchSortKeys(batch, options.GetSortKeys(), &status_)), - indices_begin_(indices_begin), - indices_end_(indices_end) {} + indices_(indices) {} // Offset is for table sorting Result Sort(int64_t offset = 0) { @@ -421,7 +418,8 @@ class RadixRecordBatchSorter { } // Sort from left to right - return column_sorts.front()->SortRange(indices_begin_, indices_end_, offset); + return column_sorts.front()->SortRange(indices_.data(), + indices_.data() + indices_.size(), offset); } protected: @@ -474,8 +472,7 @@ class RadixRecordBatchSorter { } const std::vector sort_keys_; - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; Status status_; }; @@ -484,17 +481,13 @@ class MultipleKeyRecordBatchSorter : public TypeVisitor { public: using ResolvedSortKey = ResolvedRecordBatchSortKey; - MultipleKeyRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, + MultipleKeyRecordBatchSorter(std::span indices, std::vector sort_keys) - : indices_begin_(indices_begin), - indices_end_(indices_end), - sort_keys_(std::move(sort_keys)), - comparator_(sort_keys_) {} + : indices_(indices), sort_keys_(std::move(sort_keys)), comparator_(sort_keys_) {} - MultipleKeyRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, - const RecordBatch& batch, const SortOptions& options) - : indices_begin_(indices_begin), - indices_end_(indices_end), + MultipleKeyRecordBatchSorter(std::span indices, const RecordBatch& batch, + const SortOptions& options) + : indices_(indices), sort_keys_(ResolveSortKeys(batch, options.GetSortKeys(), &status_)), comparator_(sort_keys_) {} @@ -564,7 +557,7 @@ class MultipleKeyRecordBatchSorter : public TypeVisitor { template enable_if_null SortInternal() { - std::stable_sort(indices_begin_, indices_end_, [&](uint64_t left, uint64_t right) { + std::ranges::stable_sort(indices_, [&](uint64_t left, uint64_t right) { return comparator_.Compare(left, right, 1); }); return comparator_.status(); @@ -578,7 +571,8 @@ class MultipleKeyRecordBatchSorter : public TypeVisitor { ::arrow::internal::checked_cast(first_sort_key.array); const auto p = PartitionNullsOnly( - indices_begin_, indices_end_, array, 0, first_sort_key.null_placement); + indices_.data(), indices_.data() + indices_.size(), array, 0, + first_sort_key.null_placement); const auto q = PartitionNullLikes( p.non_nulls_begin, p.non_nulls_end, array, 0, first_sort_key.null_placement); @@ -604,8 +598,7 @@ class MultipleKeyRecordBatchSorter : public TypeVisitor { return q; } - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; Status status_; std::vector sort_keys_; Comparator comparator_; @@ -625,15 +618,14 @@ class TableSorter { using Comparator = MultipleKeyComparator; public: - TableSorter(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, - const Table& table, const SortOptions& options) + TableSorter(ExecContext* ctx, std::span indices, const Table& table, + const SortOptions& options) : ctx_(ctx), table_(table), batches_(MakeBatches(table, &status_)), options_(options), sort_keys_(ResolveSortKeys(table, batches_, options.GetSortKeys(), &status_)), - indices_begin_(indices_begin), - indices_end_(indices_end), + indices_(indices), comparator_(sort_keys_) {} // This is optimized for null partitioning and merging along the first sort key. @@ -679,28 +671,29 @@ class TableSorter { for (int64_t i = 0; i < num_batches; ++i) { const auto& batch = *batches_[i]; end_offset += batch.num_rows(); - RadixRecordBatchSorter sorter(indices_begin_ + begin_offset, - indices_begin_ + end_offset, batch, options_); + RadixRecordBatchSorter sorter( + {indices_.data() + begin_offset, indices_.data() + end_offset}, batch, + options_); ARROW_ASSIGN_OR_RAISE(sorted[i], sorter.Sort(begin_offset)); - DCHECK_EQ(sorted[i].overall_begin(), indices_begin_ + begin_offset); - DCHECK_EQ(sorted[i].overall_end(), indices_begin_ + end_offset); + DCHECK_EQ(sorted[i].overall_begin(), indices_.data() + begin_offset); + DCHECK_EQ(sorted[i].overall_end(), indices_.data() + end_offset); DCHECK_EQ(sorted[i].non_null_count() + sorted[i].null_count(), batch.num_rows()); begin_offset = end_offset; // XXX this is an upper bound on the true null count null_count += sorted[i].null_count(); } - DCHECK_EQ(end_offset, indices_end_ - indices_begin_); + DCHECK_EQ(end_offset, static_cast(indices_.size())); // Then merge them by pairs, recursively if (sorted.size() > 1) { - ChunkedIndexMapper chunked_mapper(batches_, indices_begin_, indices_end_); + ChunkedIndexMapper chunked_mapper(batches_, indices_.data(), indices_.data() + indices_.size()); ARROW_ASSIGN_OR_RAISE(auto chunked_indices_pair, chunked_mapper.LogicalToPhysical()); auto [chunked_indices_begin, chunked_indices_end] = chunked_indices_pair; std::vector chunk_sorted(num_batches); for (int64_t i = 0; i < num_batches; ++i) { - chunk_sorted[i] = sorted[i].TranslateTo(indices_begin_, chunked_indices_begin); + chunk_sorted[i] = sorted[i].TranslateTo(indices_.data(), chunked_indices_begin); } struct Visitor { @@ -877,8 +870,7 @@ class TableSorter { const RecordBatchVector batches_; const SortOptions& options_; const std::vector sort_keys_; - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; Comparator comparator_; }; @@ -999,12 +991,11 @@ class SortIndicesMetaFunction : public MetaFunction { ARROW_ASSIGN_OR_RAISE(buffers[1], AllocateResizableBuffer(buffer_size, ctx->memory_pool())); auto out = std::make_shared(out_type, length, buffers, 0); - auto out_begin = out->GetMutableValues(1); - auto out_end = out_begin + length; - std::iota(out_begin, out_end, 0); + std::span out_span{out->GetMutableValues(1), + static_cast(length)}; + std::iota(out_span.begin(), out_span.end(), 0); - RETURN_NOT_OK( - SortChunkedArray(ctx, out_begin, out_end, chunked_array, order, null_placement)); + RETURN_NOT_OK(SortChunkedArray(ctx, out_span, chunked_array, order, null_placement)); return Datum(out); } @@ -1029,15 +1020,15 @@ class SortIndicesMetaFunction : public MetaFunction { ARROW_ASSIGN_OR_RAISE(buffers[1], AllocateResizableBuffer(buffer_size, ctx->memory_pool())); auto out = std::make_shared(out_type, length, buffers, 0); - auto out_begin = out->GetMutableValues(1); - auto out_end = out_begin + length; - std::iota(out_begin, out_end, 0); + std::span out_span{out->GetMutableValues(1), + static_cast(length)}; + std::iota(out_span.begin(), out_span.end(), 0); if (n_sort_keys <= kMaxRadixSortKeys) { - RadixRecordBatchSorter sorter(out_begin, out_end, std::move(sort_keys)); + RadixRecordBatchSorter sorter(out_span, std::move(sort_keys)); ARROW_RETURN_NOT_OK(sorter.Sort()); } else { - MultipleKeyRecordBatchSorter sorter(out_begin, out_end, std::move(sort_keys)); + MultipleKeyRecordBatchSorter sorter(out_span, std::move(sort_keys)); ARROW_RETURN_NOT_OK(sorter.Sort()); } return Datum(out); @@ -1069,11 +1060,11 @@ class SortIndicesMetaFunction : public MetaFunction { ARROW_ASSIGN_OR_RAISE(buffers[1], AllocateResizableBuffer(buffer_size, ctx->memory_pool())); auto out = std::make_shared(out_type, length, buffers, 0); - auto out_begin = out->GetMutableValues(1); - auto out_end = out_begin + length; - std::iota(out_begin, out_end, 0); + std::span out_span{out->GetMutableValues(1), + static_cast(length)}; + std::iota(out_span.begin(), out_span.end(), 0); - TableSorter sorter(ctx, out_begin, out_end, table, options); + TableSorter sorter(ctx, out_span, table, options); RETURN_NOT_OK(sorter.Sort()); return Datum(out); @@ -1145,30 +1136,29 @@ Result> FindSortKeys(const Schema& schema, return SortFieldPopulator{}.FindSortKeys(schema, sort_keys); } -Result SortChunkedArray(ExecContext* ctx, uint64_t* indices_begin, - uint64_t* indices_end, +Result SortChunkedArray(ExecContext* ctx, + std::span indices, const ChunkedArray& chunked_array, SortOrder sort_order, NullPlacement null_placement) { auto physical_type = GetPhysicalType(chunked_array.type()); auto physical_chunks = GetPhysicalChunks(chunked_array, physical_type); - return SortChunkedArray(ctx, indices_begin, indices_end, physical_type, physical_chunks, - sort_order, null_placement); + return SortChunkedArray(ctx, indices, physical_type, physical_chunks, sort_order, + null_placement); } Result SortChunkedArray( - ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, + ExecContext* ctx, std::span indices, const std::shared_ptr& physical_type, const ArrayVector& physical_chunks, SortOrder sort_order, NullPlacement null_placement) { NullPartitionResult output; - ChunkedArraySorter sorter(ctx, indices_begin, indices_end, physical_type, - physical_chunks, sort_order, null_placement, &output); + ChunkedArraySorter sorter(ctx, indices, physical_type, physical_chunks, sort_order, + null_placement, &output); RETURN_NOT_OK(sorter.Sort()); return output; } -Result SortStructArray(ExecContext* ctx, uint64_t* indices_begin, - uint64_t* indices_end, +Result SortStructArray(ExecContext* ctx, std::span indices, const StructArray& array, SortOrder sort_order, NullPlacement null_placement) { @@ -1185,10 +1175,10 @@ Result SortStructArray(ExecContext* ctx, uint64_t* indices_ ARROW_ASSIGN_OR_RAISE(auto sort_keys, ResolveRecordBatchSortKeys(*batch, options.GetSortKeys())); if (sort_keys.size() <= kMaxRadixSortKeys) { - RadixRecordBatchSorter sorter(indices_begin, indices_end, std::move(sort_keys)); + RadixRecordBatchSorter sorter(indices, std::move(sort_keys)); return sorter.Sort(); } else { - MultipleKeyRecordBatchSorter sorter(indices_begin, indices_end, std::move(sort_keys)); + MultipleKeyRecordBatchSorter sorter(indices, std::move(sort_keys)); return sorter.Sort(); } } From 5a620188cc45ceec95cceef2e46a6e3a86d92a05 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Wed, 24 Jun 2026 21:05:40 +0200 Subject: [PATCH 05/12] spanify working --- .../arrow/compute/kernels/chunked_internal.cc | 20 +++--- .../arrow/compute/kernels/chunked_internal.h | 22 ++----- cpp/src/arrow/compute/kernels/vector_rank.cc | 51 +++++++-------- cpp/src/arrow/compute/kernels/vector_sort.cc | 11 ++-- .../compute/kernels/vector_sort_internal.h | 65 +++++++++---------- 5 files changed, 79 insertions(+), 90 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.cc b/cpp/src/arrow/compute/kernels/chunked_internal.cc index 39495ea0e681..38a37ca4eff2 100644 --- a/cpp/src/arrow/compute/kernels/chunked_internal.cc +++ b/cpp/src/arrow/compute/kernels/chunked_internal.cc @@ -67,13 +67,13 @@ ChunkedIndexMapper::LogicalToPhysical() { } } - const int64_t num_indices = static_cast(indices_end_ - indices_begin_); + const int64_t num_indices = static_cast(indices_.size()); DCHECK_EQ(num_indices, std::accumulate(chunk_lengths_.begin(), chunk_lengths_.end(), static_cast(0))); CompressedChunkLocation* physical_begin = - reinterpret_cast(indices_begin_); - DCHECK_EQ(physical_begin + num_indices, - reinterpret_cast(indices_end_)); + reinterpret_cast(indices_.data()); + DCHECK_EQ(physical_begin + num_indices, reinterpret_cast( + indices_.data() + indices_.size())); int64_t chunk_offset = 0; for (int64_t chunk_index = 0; chunk_index < static_cast(chunk_lengths_.size()); @@ -82,12 +82,12 @@ ChunkedIndexMapper::LogicalToPhysical() { for (int64_t i = 0; i < chunk_length; ++i) { // Logical indices are expected to be chunk-partitioned, which avoids costly // chunked index resolution. - DCHECK_GE(indices_begin_[chunk_offset + i], static_cast(chunk_offset)); - DCHECK_LT(indices_begin_[chunk_offset + i], + DCHECK_GE(indices_[chunk_offset + i], static_cast(chunk_offset)); + DCHECK_LT(indices_[chunk_offset + i], static_cast(chunk_offset + chunk_length)); physical_begin[chunk_offset + i] = CompressedChunkLocation{ static_cast(chunk_index), - indices_begin_[chunk_offset + i] - static_cast(chunk_offset)}; + indices_[chunk_offset + i] - static_cast(chunk_offset)}; } chunk_offset += chunk_length; } @@ -105,15 +105,15 @@ Status ChunkedIndexMapper::PhysicalToLogical() { } } - const int64_t num_indices = static_cast(indices_end_ - indices_begin_); + const int64_t num_indices = static_cast(indices_.size()); CompressedChunkLocation* physical_begin = - reinterpret_cast(indices_begin_); + reinterpret_cast(indices_.data()); for (int64_t i = 0; i < num_indices; ++i) { const auto loc = physical_begin[i]; DCHECK_LT(loc.chunk_index(), chunk_offsets.size()); DCHECK_LT(loc.index_in_chunk(), static_cast(chunk_lengths_[loc.chunk_index()])); - indices_begin_[i] = + indices_[i] = chunk_offsets[loc.chunk_index()] + static_cast(loc.index_in_chunk()); } diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.h b/cpp/src/arrow/compute/kernels/chunked_internal.h index 2dcfa0047ea0..ed02070330f6 100644 --- a/cpp/src/arrow/compute/kernels/chunked_internal.h +++ b/cpp/src/arrow/compute/kernels/chunked_internal.h @@ -127,19 +127,12 @@ ARROW_EXPORT std::vector GetArrayPointers(const ArrayVector& array // and vice-versa. class ARROW_EXPORT ChunkedIndexMapper { public: - ChunkedIndexMapper(const std::vector& chunks, uint64_t* indices_begin, - uint64_t* indices_end) - : ChunkedIndexMapper(std::span(chunks), indices_begin, indices_end) {} - ChunkedIndexMapper(std::span chunks, uint64_t* indices_begin, - uint64_t* indices_end) - : chunk_lengths_(GetChunkLengths(chunks)), - indices_begin_(indices_begin), - indices_end_(indices_end) {} - ChunkedIndexMapper(const RecordBatchVector& chunks, uint64_t* indices_begin, - uint64_t* indices_end) - : chunk_lengths_(GetChunkLengths(chunks)), - indices_begin_(indices_begin), - indices_end_(indices_end) {} + ChunkedIndexMapper(const std::vector& chunks, std::span indices) + : ChunkedIndexMapper(std::span(chunks), indices) {} + ChunkedIndexMapper(std::span chunks, std::span indices) + : chunk_lengths_(GetChunkLengths(chunks)), indices_(indices) {} + ChunkedIndexMapper(const RecordBatchVector& chunks, std::span indices) + : chunk_lengths_(GetChunkLengths(chunks)), indices_(indices) {} // Turn the original uint64_t logical indices into physical. This reuses the // same memory area, so the logical indices cannot be used anymore until @@ -158,8 +151,7 @@ class ARROW_EXPORT ChunkedIndexMapper { static std::vector GetChunkLengths(const RecordBatchVector& chunks); std::vector chunk_lengths_; - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; }; } // namespace arrow::compute::internal diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc index d32dd359c76d..05f06118e1cc 100644 --- a/cpp/src/arrow/compute/kernels/vector_rank.cc +++ b/cpp/src/arrow/compute/kernels/vector_rank.cc @@ -71,7 +71,7 @@ void MarkDuplicates(const NullPartitionResult& sorted, ValueSelector&& value_sel template Result DoSortAndMarkDuplicate( - ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, const Array& input, + ExecContext* ctx, std::span indices, const Array& input, const std::shared_ptr& physical_type, const SortOrder order, const NullPlacement null_placement, bool needs_duplicates) { using GetView = GetViewType; @@ -80,9 +80,9 @@ Result DoSortAndMarkDuplicate( ARROW_ASSIGN_OR_RAISE(auto array_sorter, GetArraySorter(*physical_type)); ArrayType array(input.data()); - ARROW_ASSIGN_OR_RAISE(auto sorted, - array_sorter(indices_begin, indices_end, array, 0, - ArraySortOptions(order, null_placement), ctx)); + ARROW_ASSIGN_OR_RAISE( + auto sorted, + array_sorter(indices, array, 0, ArraySortOptions(order, null_placement), ctx)); if (needs_duplicates) { auto value_selector = [&array](int64_t index) { @@ -100,16 +100,16 @@ Result DoSortAndMarkDuplicate( template Result DoSortAndMarkDuplicate( - ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, - const ChunkedArray& input, const std::shared_ptr& physical_type, - const SortOrder order, const NullPlacement null_placement, bool needs_duplicates) { + ExecContext* ctx, std::span indices, const ChunkedArray& input, + const std::shared_ptr& physical_type, const SortOrder order, + const NullPlacement null_placement, bool needs_duplicates) { auto physical_chunks = GetPhysicalChunks(input, physical_type); if (physical_chunks.empty()) { return NullPartitionResult{}; } - ARROW_ASSIGN_OR_RAISE(auto sorted, - SortChunkedArray(ctx, indices_begin, indices_end, physical_type, - physical_chunks, order, null_placement)); + ARROW_ASSIGN_OR_RAISE( + auto sorted, SortChunkedArray(ctx, indices, physical_type, physical_chunks, order, + null_placement)); if (needs_duplicates) { const auto arrays = GetArrayPointers(physical_chunks); auto value_selector = [resolver = @@ -132,13 +132,12 @@ Result DoSortAndMarkDuplicate( template class SortAndMarkDuplicate : public TypeVisitor { public: - SortAndMarkDuplicate(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, + SortAndMarkDuplicate(ExecContext* ctx, std::span indices, const InputType& input, const SortOrder order, const NullPlacement null_placement, const bool needs_duplicate) : TypeVisitor(), ctx_(ctx), - indices_begin_(indices_begin), - indices_end_(indices_end), + indices_(indices), input_(input), order_(order), null_placement_(null_placement), @@ -150,13 +149,12 @@ class SortAndMarkDuplicate : public TypeVisitor { return sorted_; } -#define VISIT(TYPE) \ - Status Visit(const TYPE& type) { \ - ARROW_ASSIGN_OR_RAISE( \ - sorted_, DoSortAndMarkDuplicate(ctx_, indices_begin_, indices_end_, \ - input_, physical_type_, order_, \ - null_placement_, needs_duplicates_)); \ - return Status::OK(); \ +#define VISIT(TYPE) \ + Status Visit(const TYPE& type) { \ + ARROW_ASSIGN_OR_RAISE(sorted_, DoSortAndMarkDuplicate( \ + ctx_, indices_, input_, physical_type_, order_, \ + null_placement_, needs_duplicates_)); \ + return Status::OK(); \ } VISIT_SORTABLE_PHYSICAL_TYPES(VISIT) @@ -165,8 +163,7 @@ class SortAndMarkDuplicate : public TypeVisitor { private: ExecContext* ctx_; - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; const InputType& input_; const SortOrder order_; const NullPlacement null_placement_; @@ -379,13 +376,13 @@ class RankMetaFunctionBase : public MetaFunction { int64_t length = input.length(); ARROW_ASSIGN_OR_RAISE(auto indices, MakeMutableUInt64Array(length, ctx->memory_pool())); - auto* indices_begin = indices->GetMutableValues(1); - auto* indices_end = indices_begin + length; - std::iota(indices_begin, indices_end, 0); + std::span indices_span{indices->GetMutableValues(1), + static_cast(length)}; + std::iota(indices_span.begin(), indices_span.end(), 0); auto needs_duplicates = Derived::NeedsDuplicates(options); ARROW_ASSIGN_OR_RAISE( - auto sorted, SortAndMarkDuplicate(ctx, indices_begin, indices_end, input, order, - null_placement, needs_duplicates) + auto sorted, SortAndMarkDuplicate(ctx, indices_span, input, order, null_placement, + needs_duplicates) .Run()); auto ranker = Derived::GetRanker(options); diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index 3a03bbb6830b..85d0dc3c5de2 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -103,15 +103,18 @@ class ChunkedArraySorter : public TypeVisitor { const auto array = checked_cast(arrays[i]); end_offset += array->length(); null_count += array->null_count(); - ARROW_ASSIGN_OR_RAISE(sorted[i], - array_sorter_(indices_.data(), indices_.data() + indices_.size(), *array, begin_offset, options, ctx_)); + // TODO.TAE + ARROW_ASSIGN_OR_RAISE( + sorted[i], array_sorter_(std::span(indices_.data() + begin_offset, + indices_.data() + end_offset), + *array, begin_offset, options, ctx_)); begin_offset = end_offset; } DCHECK_EQ(end_offset, num_indices); // Then merge them by pairs, recursively if (sorted.size() > 1) { - ChunkedIndexMapper chunked_mapper(arrays, indices_.data(), indices_.data() + indices_.size()); + ChunkedIndexMapper chunked_mapper(arrays, indices_); ARROW_ASSIGN_OR_RAISE(auto chunked_indices_pair, chunked_mapper.LogicalToPhysical()); auto [chunked_indices_begin, chunked_indices_end] = chunked_indices_pair; @@ -686,7 +689,7 @@ class TableSorter { // Then merge them by pairs, recursively if (sorted.size() > 1) { - ChunkedIndexMapper chunked_mapper(batches_, indices_.data(), indices_.data() + indices_.size()); + ChunkedIndexMapper chunked_mapper(batches_, indices_); ARROW_ASSIGN_OR_RAISE(auto chunked_indices_pair, chunked_mapper.LogicalToPhysical()); auto [chunked_indices_begin, chunked_indices_end] = chunked_indices_pair; diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index 671991d1d3c3..5ca4bb1d67cf 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -367,33 +367,34 @@ PartitionResultByNullLikeness PartitionNullsAndNans(uint64_t* indices_begin, .nan_range = {q.nulls_begin, q.nulls_end}}; } -template -struct GenericMergeImpl { - using MergeNullsFunc = std::function; +struct ChunkedMergeImpl { + using MergeNullsFunc = std::function; - using MergeNonNullsFunc = - std::function; + using MergeNonNullsFunc = std::function; - GenericMergeImpl(NullPlacement null_placement, MergeNullsFunc&& merge_nulls, + ChunkedMergeImpl(NullPlacement null_placement, MergeNullsFunc&& merge_nulls, MergeNonNullsFunc&& merge_non_nulls) : null_placement_(null_placement), merge_nulls_(std::move(merge_nulls)), merge_non_nulls_(std::move(merge_non_nulls)) {} Status Init(ExecContext* ctx, int64_t temp_indices_length) { - ARROW_ASSIGN_OR_RAISE( - temp_buffer_, - AllocateBuffer(sizeof(IndexType) * temp_indices_length, ctx->memory_pool())); - temp_indices_ = reinterpret_cast(temp_buffer_->mutable_data()); + ARROW_ASSIGN_OR_RAISE(temp_buffer_, AllocateBuffer(sizeof(CompressedChunkLocation) * + temp_indices_length, + ctx->memory_pool())); + temp_indices_ = + reinterpret_cast(temp_buffer_->mutable_data()); return Status::OK(); } - NullPartitionResultType Merge(const NullPartitionResultType& left, - const NullPartitionResultType& right, - int64_t null_count) const { + ChunkedNullPartitionResult Merge(const ChunkedNullPartitionResult& left, + const ChunkedNullPartitionResult& right, + int64_t null_count) const { if (null_placement_ == NullPlacement::AtStart) { return MergeNullsAtStart(left, right, null_count); } else { @@ -401,9 +402,9 @@ struct GenericMergeImpl { } } - NullPartitionResultType MergeNullsAtStart(const NullPartitionResultType& left, - const NullPartitionResultType& right, - int64_t null_count) const { + ChunkedNullPartitionResult MergeNullsAtStart(const ChunkedNullPartitionResult& left, + const ChunkedNullPartitionResult& right, + int64_t null_count) const { // Input layout: // [left nulls .... left non-nulls .... right nulls .... right non-nulls] ARROW_DCHECK_EQ(left.nulls_end, left.non_nulls_begin); @@ -414,7 +415,7 @@ struct GenericMergeImpl { // [left nulls .... right nulls .... left non-nulls .... right non-nulls] std::rotate(left.non_nulls_begin, right.nulls_begin, right.nulls_end); - const auto p = NullPartitionResultType::NullsAtStart( + const auto p = ChunkedNullPartitionResult::NullsAtStart( left.nulls_begin, right.non_nulls_end, left.nulls_begin + left.null_count() + right.null_count()); @@ -436,9 +437,9 @@ struct GenericMergeImpl { return p; } - NullPartitionResultType MergeNullsAtEnd(const NullPartitionResultType& left, - const NullPartitionResultType& right, - int64_t null_count) const { + ChunkedNullPartitionResult MergeNullsAtEnd(const ChunkedNullPartitionResult& left, + const ChunkedNullPartitionResult& right, + int64_t null_count) const { // Input layout: // [left non-nulls .... left nulls .... right non-nulls .... right nulls] ARROW_DCHECK_EQ(left.non_nulls_end, left.nulls_begin); @@ -449,7 +450,7 @@ struct GenericMergeImpl { // [left non-nulls .... right non-nulls .... left nulls .... right nulls] std::rotate(left.nulls_begin, right.non_nulls_begin, right.non_nulls_end); - const auto p = NullPartitionResultType::NullsAtEnd( + const auto p = ChunkedNullPartitionResult::NullsAtEnd( left.non_nulls_begin, right.nulls_end, left.non_nulls_begin + left.non_null_count() + right.non_null_count()); @@ -476,34 +477,30 @@ struct GenericMergeImpl { MergeNullsFunc merge_nulls_; MergeNonNullsFunc merge_non_nulls_; std::unique_ptr temp_buffer_; - IndexType* temp_indices_ = nullptr; + CompressedChunkLocation* temp_indices_ = nullptr; }; -using ChunkedMergeImpl = - GenericMergeImpl; - // TODO make this usable if indices are non trivial on input // (see ConcreteRecordBatchColumnSorter) // `offset` is used when this is called on a chunk of a chunked array using ArraySortFunc = std::function( - uint64_t* indices_begin, uint64_t* indices_end, const Array& values, int64_t offset, + std::span indices, const Array& values, int64_t offset, const ArraySortOptions& options, ExecContext* ctx)>; Result GetArraySorter(const DataType& type); -Result SortChunkedArray(ExecContext* ctx, uint64_t* indices_begin, - uint64_t* indices_end, +Result SortChunkedArray(ExecContext* ctx, + std::span indices, const ChunkedArray& chunked_array, SortOrder sort_order, NullPlacement null_placement); Result SortChunkedArray( - ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, + ExecContext* ctx, std::span indices, const std::shared_ptr& physical_type, const ArrayVector& physical_chunks, SortOrder sort_order, NullPlacement null_placement); -Result SortStructArray(ExecContext* ctx, uint64_t* indices_begin, - uint64_t* indices_end, +Result SortStructArray(ExecContext* ctx, std::span indices, const StructArray& array, SortOrder sort_order, NullPlacement null_placement); From 36f348f20e07387f47dabeaf357968f3d5fdafed Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Wed, 24 Jun 2026 21:56:35 +0200 Subject: [PATCH 06/12] more spans --- cpp/src/arrow/compute/kernels/vector_sort.cc | 56 ++++++++++---------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index 85d0dc3c5de2..8c37b3643368 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -231,26 +231,26 @@ class ChunkedArraySorter : public TypeVisitor { // Visit contiguous ranges of equal values. All entries are assumed // to be non-null. template -void VisitConstantRanges(const ArrayType& array, uint64_t* indices_begin, - uint64_t* indices_end, int64_t offset, Visitor&& visit) { +void VisitConstantRanges(const ArrayType& array, std::span indices, + int64_t offset, Visitor&& visit) { using GetView = GetViewType; - if (indices_begin == indices_end) { + if (indices.empty()) { return; } - auto range_start = indices_begin; + auto range_start = indices.begin(); auto range_cur = range_start; auto last_value = GetView::LogicalValue(array.GetView(*range_cur - offset)); - while (++range_cur != indices_end) { + while (++range_cur != indices.end()) { auto v = GetView::LogicalValue(array.GetView(*range_cur - offset)); if (v != last_value) { - visit(range_start, range_cur); + visit(std::span{range_start, range_cur}); range_start = range_cur; last_value = v; } } - if (range_start != range_cur) { - visit(range_start, range_cur); + if (range_start != indices.end()) { + visit({range_start, indices.end()}); } } @@ -262,8 +262,7 @@ class RecordBatchColumnSorter { : next_column_(next_column) {} virtual ~RecordBatchColumnSorter() {} - virtual NullPartitionResult SortRange(uint64_t* indices_begin, uint64_t* indices_end, - int64_t offset) = 0; + virtual NullPartitionResult SortRange(std::span indices, int64_t offset) = 0; protected: RecordBatchColumnSorter* next_column_; @@ -284,17 +283,18 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter { null_placement_(null_placement), null_count_(array_.null_count()) {} - NullPartitionResult SortRange(uint64_t* indices_begin, uint64_t* indices_end, - int64_t offset) override { + NullPartitionResult SortRange(std::span indices, int64_t offset) override { using GetView = GetViewType; NullPartitionResult p; if (null_count_ == 0) { - p = NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement_); + p = NullPartitionResult::NoNulls(indices.data(), indices.data() + indices.size(), + null_placement_); } else { // NOTE that null_count_ is merely an upper bound on the number of nulls // in this particular range. - p = PartitionNullsOnly(indices_begin, indices_end, array_, + p = PartitionNullsOnly(indices.data(), + indices.data() + indices.size(), array_, offset, null_placement_); DCHECK_LE(p.nulls_end - p.nulls_begin, null_count_); } @@ -325,22 +325,21 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter { if (next_column_ != nullptr) { // Visit all ranges of equal values in this column and sort them on // the next column. - SortNextColumn(q.nulls_begin, q.nulls_end, offset); - SortNextColumn(p.nulls_begin, p.nulls_end, offset); - VisitConstantRanges(array_, q.non_nulls_begin, q.non_nulls_end, offset, - [&](uint64_t* range_start, uint64_t* range_end) { - SortNextColumn(range_start, range_end, offset); - }); + SortNextColumn({q.nulls_begin, q.nulls_end}, offset); + SortNextColumn({p.nulls_begin, p.nulls_end}, offset); + VisitConstantRanges( + array_, {q.non_nulls_begin, q.non_nulls_end}, offset, + [&](std::span indices) { SortNextColumn(indices, offset); }); } return NullPartitionResult{q.non_nulls_begin, q.non_nulls_end, std::min(q.nulls_begin, p.nulls_begin), std::max(q.nulls_end, p.nulls_end)}; } - void SortNextColumn(uint64_t* indices_begin, uint64_t* indices_end, int64_t offset) { + void SortNextColumn(std::span indices, int64_t offset) { // Avoid the cost of a virtual method call in trivial cases - if (indices_end - indices_begin > 1) { - next_column_->SortRange(indices_begin, indices_end, offset); + if (indices.size() > 1) { + next_column_->SortRange(indices, offset); } } @@ -360,12 +359,12 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter RecordBatchColumnSorter* next_column = nullptr) : RecordBatchColumnSorter(next_column), null_placement_(null_placement) {} - NullPartitionResult SortRange(uint64_t* indices_begin, uint64_t* indices_end, - int64_t offset) { + NullPartitionResult SortRange(std::span indices, int64_t offset) { if (next_column_ != nullptr) { - next_column_->SortRange(indices_begin, indices_end, offset); + next_column_->SortRange(indices, offset); } - return NullPartitionResult::NullsOnly(indices_begin, indices_end, null_placement_); + return NullPartitionResult::NullsOnly(indices.data(), indices.data() + indices.size(), + null_placement_); } protected: @@ -421,8 +420,7 @@ class RadixRecordBatchSorter { } // Sort from left to right - return column_sorts.front()->SortRange(indices_.data(), - indices_.data() + indices_.size(), offset); + return column_sorts.front()->SortRange(indices_, offset); } protected: From 7c77d23733e32cebb8098514cc8f46962fb6ab4a Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Wed, 24 Jun 2026 22:12:53 +0200 Subject: [PATCH 07/12] another span --- cpp/src/arrow/compute/kernels/vector_sort.cc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index 8c37b3643368..9dae5582869c 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -97,20 +97,17 @@ class ChunkedArraySorter : public TypeVisitor { // First sort all individual chunks int64_t begin_offset = 0; - int64_t end_offset = 0; int64_t null_count = 0; for (int i = 0; i < num_chunks; ++i) { const auto array = checked_cast(arrays[i]); - end_offset += array->length(); + const auto array_length = array->length(); null_count += array->null_count(); - // TODO.TAE ARROW_ASSIGN_OR_RAISE( - sorted[i], array_sorter_(std::span(indices_.data() + begin_offset, - indices_.data() + end_offset), - *array, begin_offset, options, ctx_)); - begin_offset = end_offset; + sorted[i], array_sorter_(indices_.subspan(begin_offset, array_length), *array, + begin_offset, options, ctx_)); + begin_offset += array_length; } - DCHECK_EQ(end_offset, num_indices); + DCHECK_EQ(begin_offset, num_indices); // Then merge them by pairs, recursively if (sorted.size() > 1) { From c0fd6ec25f52a6f3a20f43ff5b1cddd033927ffd Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Wed, 24 Jun 2026 23:32:14 +0200 Subject: [PATCH 08/12] replace Partition usage --- .../compute/kernels/vector_array_sort.cc | 6 +- .../arrow/compute/kernels/vector_select_k.cc | 20 ++--- cpp/src/arrow/compute/kernels/vector_sort.cc | 77 +++++++++---------- .../compute/kernels/vector_sort_internal.h | 22 +++++- 4 files changed, 64 insertions(+), 61 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc index d7d3d30df8ae..8e21d915eb3e 100644 --- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc @@ -78,8 +78,7 @@ struct PartitionNthToIndices { return Status::OK(); } const auto p = PartitionNullsAndNans( - out_span.data(), out_span.data() + out_span.size(), arr, 0, - options.null_placement); + out_span, arr, 0, options.null_placement); auto nth_begin = out_span.begin() + pivot; if (nth_begin >= p.non_null_like_range.begin() && nth_begin < p.non_null_like_range.end()) { @@ -154,8 +153,7 @@ class ArrayCompareSorter { const auto& values = checked_cast(array); const auto p = PartitionNullsAndNans( - indices.data(), indices.data() + indices.size(), values, offset, - options.null_placement); + indices, values, offset, options.null_placement); if (options.order == SortOrder::Ascending) { std::stable_sort( p.non_null_like_range.begin(), p.non_null_like_range.end(), diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc index 4cba5db58eae..ed22625fa8ef 100644 --- a/cpp/src/arrow/compute/kernels/vector_select_k.cc +++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc @@ -205,16 +205,14 @@ class ArraySelector : public TypeVisitor { std::vector indices(arr.length()); - uint64_t* indices_begin = indices.data(); - uint64_t* indices_end = indices_begin + indices.size(); - std::iota(indices_begin, indices_end, 0); + std::iota(indices.begin(), indices.end(), 0); ARROW_ASSIGN_OR_RAISE(auto take_indices, MakeMutableUInt64Array(k_, ctx_->memory_pool())); auto* output_begin = take_indices->template GetMutableValues(1); const auto p = PartitionNullsAndNans( - indices_begin, indices_end, arr, 0, null_placement_); + indices, arr, 0, null_placement_); // From k, calculate // l = non_null_like elements to take from PartitionResult @@ -317,13 +315,11 @@ class ChunkedArraySelector : public TypeVisitor { auto& indices = indices_by_chunk.emplace_back(); indices.resize(arr.length()); - uint64_t* indices_begin = indices.data(); - uint64_t* indices_end = indices_begin + indices.size(); - std::iota(indices_begin, indices_end, 0); + std::iota(indices.begin(), indices.end(), 0); partitions_by_chunk.emplace_back( - PartitionNullsAndNans( - indices_begin, indices_end, arr, 0, null_placement_)); + PartitionNullsAndNans(indices, arr, 0, + null_placement_)); null_count += partitions_by_chunk.back().null_range.size(); nan_count += partitions_by_chunk.back().nan_range.size(); @@ -472,12 +468,8 @@ class RecordBatchSelector { const auto& first_remaining_sort_key = selector_->sort_keys_[start_sort_key_index_]; const auto& arr = checked_cast(first_remaining_sort_key.array); - uint64_t* input_indices_begin = input_indices_.data(); - uint64_t* input_indices_end = input_indices_.data() + input_indices_.size(); - const auto p = PartitionNullsAndNans( - input_indices_begin, input_indices_end, arr, 0, - first_remaining_sort_key.null_placement); + input_indices_, arr, 0, first_remaining_sort_key.null_placement); // From k = output_indices_.size(), calculate // l = non_null_like elements to take from PartitionResult diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index 9dae5582869c..599e79845de7 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -133,8 +133,9 @@ class ChunkedArraySorter : public TypeVisitor { auto merge_non_nulls = [&](CompressedChunkLocation* range_begin, CompressedChunkLocation* range_middle, CompressedChunkLocation* range_end, CompressedChunkLocation* temp_indices) { - MergeNonNulls(range_begin, range_middle, range_end, arrays, - temp_indices); + MergeNonNulls( + {range_begin, range_middle}, {range_middle, range_end}, arrays, + {temp_indices, static_cast(range_end - range_begin)}); }; ChunkedMergeImpl merge_impl{null_placement_, std::move(merge_nulls), @@ -177,31 +178,33 @@ class ChunkedArraySorter : public TypeVisitor { } template - void MergeNonNulls(CompressedChunkLocation* range_begin, - CompressedChunkLocation* range_middle, - CompressedChunkLocation* range_end, + void MergeNonNulls(std::span left, + std::span right, std::span arrays, - CompressedChunkLocation* temp_indices) { + std::span temp_indices) { using ArrowType = typename ArrayType::TypeClass; if (order_ == SortOrder::Ascending) { - std::merge(range_begin, range_middle, range_middle, range_end, temp_indices, - [&](CompressedChunkLocation left, CompressedChunkLocation right) { - return ChunkValue(arrays, left) < - ChunkValue(arrays, right); - }); + std::ranges::merge( + left, right, temp_indices.begin(), + [&](CompressedChunkLocation left, CompressedChunkLocation right) { + return ChunkValue(arrays, left) < + ChunkValue(arrays, right); + }); } else { - std::merge(range_begin, range_middle, range_middle, range_end, temp_indices, - [&](CompressedChunkLocation left, CompressedChunkLocation right) { - // We don't use 'left > right' here to reduce required - // operator. If we use 'right < left' here, '<' is only - // required. - return ChunkValue(arrays, right) < - ChunkValue(arrays, left); - }); + std::ranges::merge( + left, right, temp_indices.begin(), + [&](CompressedChunkLocation left, CompressedChunkLocation right) { + // We don't use 'left > right' here to reduce required + // operator. If we use 'right < left' here, '<' is only + // required. + return ChunkValue(arrays, right) < + ChunkValue(arrays, left); + }); } // Copy back temp area into main buffer - std::copy(temp_indices, temp_indices + (range_end - range_begin), range_begin); + std::ranges::copy(temp_indices.begin(), + temp_indices.begin() + left.size() + right.size(), left.begin()); } template @@ -283,34 +286,32 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter { NullPartitionResult SortRange(std::span indices, int64_t offset) override { using GetView = GetViewType; - NullPartitionResult p; + PartitionResultByNullLikeness partitions; if (null_count_ == 0) { - p = NullPartitionResult::NoNulls(indices.data(), indices.data() + indices.size(), - null_placement_); + partitions = PartitionNansOnly( + indices, array_, offset, null_placement_); + } else { // NOTE that null_count_ is merely an upper bound on the number of nulls // in this particular range. - p = PartitionNullsOnly(indices.data(), - indices.data() + indices.size(), array_, - offset, null_placement_); - DCHECK_LE(p.nulls_end - p.nulls_begin, null_count_); + partitions = PartitionNullsAndNans( + indices, array_, offset, null_placement_); + DCHECK_LE(static_cast(partitions.null_range.size()), null_count_); } - const NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, array_, offset, null_placement_); // TODO This is roughly the same as ArrayCompareSorter. // Also, we would like to use a counting sort if possible. This requires // a counting sort compatible with indirect indexing. if (order_ == SortOrder::Ascending) { - std::stable_sort( - q.non_nulls_begin, q.non_nulls_end, [&](uint64_t left, uint64_t right) { + std::ranges::stable_sort( + partitions.non_null_like_range, [&](uint64_t left, uint64_t right) { const auto lhs = GetView::LogicalValue(array_.GetView(left - offset)); const auto rhs = GetView::LogicalValue(array_.GetView(right - offset)); return lhs < rhs; }); } else { - std::stable_sort( - q.non_nulls_begin, q.non_nulls_end, [&](uint64_t left, uint64_t right) { + std::ranges::stable_sort( + partitions.non_null_like_range, [&](uint64_t left, uint64_t right) { // We don't use 'left > right' here to reduce required operator. // If we use 'right < left' here, '<' is only required. const auto lhs = GetView::LogicalValue(array_.GetView(left - offset)); @@ -322,15 +323,13 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter { if (next_column_ != nullptr) { // Visit all ranges of equal values in this column and sort them on // the next column. - SortNextColumn({q.nulls_begin, q.nulls_end}, offset); - SortNextColumn({p.nulls_begin, p.nulls_end}, offset); + SortNextColumn(partitions.null_range, offset); + SortNextColumn(partitions.nan_range, offset); VisitConstantRanges( - array_, {q.non_nulls_begin, q.non_nulls_end}, offset, + array_, partitions.non_null_like_range, offset, [&](std::span indices) { SortNextColumn(indices, offset); }); } - return NullPartitionResult{q.non_nulls_begin, q.non_nulls_end, - std::min(q.nulls_begin, p.nulls_begin), - std::max(q.nulls_end, p.nulls_end)}; + return partitions.toLegacyNullPartitionResult(); } void SortNextColumn(std::span indices, int64_t offset) { diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index 5ca4bb1d67cf..54df6cef105a 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -351,14 +351,28 @@ NullPartitionResult PartitionNullLikes(uint64_t* indices_begin, uint64_t* indice } template -PartitionResultByNullLikeness PartitionNullsAndNans(uint64_t* indices_begin, - uint64_t* indices_end, +PartitionResultByNullLikeness PartitionNullsAndNans(std::span indices, const ArrayType& values, int64_t offset, NullPlacement null_placement) { // Partition nulls at start (resp. end), and null-like values just before (resp. after) - NullPartitionResult p = PartitionNullsOnly(indices_begin, indices_end, - values, offset, null_placement); + NullPartitionResult p = PartitionNullsOnly( + indices.data(), indices.data() + indices.size(), values, offset, null_placement); + NullPartitionResult q = PartitionNullLikes( + p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); + return PartitionResultByNullLikeness{ + .non_null_like_range = {q.non_nulls_begin, q.non_nulls_end}, + .null_range = {p.nulls_begin, p.nulls_end}, + .nan_range = {q.nulls_begin, q.nulls_end}}; +} + +template +PartitionResultByNullLikeness PartitionNansOnly(std::span indices, + const ArrayType& values, int64_t offset, + NullPlacement null_placement) { + // Partition nulls at start (resp. end), and null-like values just before (resp. after) + NullPartitionResult p = NullPartitionResult::NoNulls( + indices.data(), indices.data() + indices.size(), null_placement); NullPartitionResult q = PartitionNullLikes( p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); return PartitionResultByNullLikeness{ From 245c10fee7d3c3ea3473319b684b1bdd6f9c0c3e Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Thu, 25 Jun 2026 00:49:14 +0200 Subject: [PATCH 09/12] Change interface to PartitionResultByNullLikeness --- .../compute/kernels/vector_array_sort.cc | 112 +++++---- cpp/src/arrow/compute/kernels/vector_rank.cc | 64 +++--- cpp/src/arrow/compute/kernels/vector_sort.cc | 98 ++++---- .../compute/kernels/vector_sort_internal.h | 213 ++++++++++-------- 4 files changed, 244 insertions(+), 243 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc index 8e21d915eb3e..1353018e812d 100644 --- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc @@ -147,9 +147,10 @@ class ArrayCompareSorter { using GetView = GetViewType; public: - Result operator()(std::span indices, const Array& array, - int64_t offset, const ArraySortOptions& options, - ExecContext*) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext*) { const auto& values = checked_cast(array); const auto p = PartitionNullsAndNans( @@ -173,16 +174,17 @@ class ArrayCompareSorter { return rhs < lhs; }); } - return p.toLegacyNullPartitionResult(); + return p; } }; template <> class ArrayCompareSorter { public: - Result operator()(std::span indices, const Array& array, - int64_t offset, const ArraySortOptions& options, - ExecContext* ctx) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext* ctx) { const auto& dict_array = checked_cast(array); const auto& dict_values = dict_array.dictionary(); const auto& dict_indices = dict_array.indices(); @@ -265,9 +267,10 @@ class ArrayCompareSorter { template <> class ArrayCompareSorter { public: - Result operator()(std::span indices, const Array& array, - int64_t offset, const ArraySortOptions& options, - ExecContext* ctx) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext* ctx) { const auto& struct_array = checked_cast(array); return SortStructArray(ctx, indices, struct_array, options.order, options.null_placement); @@ -290,9 +293,10 @@ class ArrayCountSorter { value_range_ = static_cast(max - min) + 1; } - Result operator()(std::span indices, const Array& array, - int64_t offset, const ArraySortOptions& options, - ExecContext*) const { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext*) const { const auto& values = checked_cast(array); // 32bit counter performs much better than 64bit one @@ -308,14 +312,14 @@ class ArrayCountSorter { uint32_t value_range_{0}; template - NullPartitionResult SortInternal(std::span indices, const ArrayType& values, - int64_t offset, - const ArraySortOptions& options) const { + PartitionResultByNullLikeness SortInternal(std::span indices, + const ArrayType& values, int64_t offset, + const ArraySortOptions& options) const { const uint32_t value_range = value_range_; // first and last slot reserved for prefix sum (depending on sort order) std::vector counts(2 + value_range); - NullPartitionResult p; + PartitionResultByNullLikeness p; if (options.order == SortOrder::Ascending) { // counts will be increasing, starting with 0 and ending with (length - null_count) @@ -324,15 +328,10 @@ class ArrayCountSorter { counts[i] += counts[i - 1]; } - if (options.null_placement == NullPlacement::AtStart) { - p = NullPartitionResult::NullsAtStart( - indices.data(), indices.data() + indices.size(), - indices.data() + indices.size() - counts[value_range]); - } else { - p = NullPartitionResult::NullsAtEnd(indices.data(), - indices.data() + indices.size(), - indices.data() + counts[value_range]); - } + p = PartitionResultByNullLikeness::fromCounts(indices, counts[value_range], 0, + indices.size() - counts[value_range], + options.null_placement); + EmitIndices(p, values, offset, &counts[0]); } else { // counts will be decreasing, starting with (length - null_count) and ending with 0 @@ -341,14 +340,8 @@ class ArrayCountSorter { counts[i - 1] += counts[i]; } - if (options.null_placement == NullPlacement::AtStart) { - p = NullPartitionResult::NullsAtStart( - indices.data(), indices.data() + indices.size(), - indices.data() + indices.size() - counts[0]); - } else { - p = NullPartitionResult::NullsAtEnd( - indices.data(), indices.data() + indices.size(), indices.data() + counts[0]); - } + p = PartitionResultByNullLikeness::fromCounts( + indices, counts[0], 0, indices.size() - counts[0], options.null_placement); EmitIndices(p, values, offset, &counts[1]); } return p; @@ -361,14 +354,14 @@ class ArrayCountSorter { } template - void EmitIndices(const NullPartitionResult& p, const ArrayType& values, int64_t offset, - CounterType* counts) const { + void EmitIndices(const PartitionResultByNullLikeness& p, const ArrayType& values, + int64_t offset, CounterType* counts) const { int64_t index = offset; CounterType count_nulls = 0; VisitRawValuesInline( *values.data(), - [&](c_type v) { p.non_nulls_begin[counts[v - min_]++] = index++; }, - [&]() { p.nulls_begin[count_nulls++] = index++; }); + [&](c_type v) { p.non_null_like_range[counts[v - min_]++] = index++; }, + [&]() { p.null_range[count_nulls++] = index++; }); } }; @@ -377,25 +370,22 @@ class ArrayCountSorter { public: ArrayCountSorter() = default; - Result operator()(std::span indices, const Array& array, - int64_t offset, const ArraySortOptions& options, - ExecContext*) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext*) { const auto& values = checked_cast(array); std::array counts{0, 0, 0}; // false, true, null const int64_t nulls = values.null_count(); + const int64_t non_nulls = values.length() - nulls; + const int64_t ones = values.true_count(); - const int64_t zeros = values.length() - ones - nulls; + const int64_t zeros = non_nulls - ones; - NullPartitionResult p; - if (options.null_placement == NullPlacement::AtStart) { - p = NullPartitionResult::NullsAtStart( - indices.data(), indices.data() + indices.size(), indices.data() + nulls); - } else { - p = NullPartitionResult::NullsAtEnd(indices.data(), indices.data() + indices.size(), - indices.data() + indices.size() - nulls); - } + PartitionResultByNullLikeness p = PartitionResultByNullLikeness::fromCounts( + indices, non_nulls, 0, nulls, options.null_placement); if (options.order == SortOrder::Ascending) { // ones start after zeros @@ -407,8 +397,8 @@ class ArrayCountSorter { int64_t index = offset; VisitRawValuesInline( - *values.data(), [&](bool v) { p.non_nulls_begin[counts[v]++] = index++; }, - [&]() { p.nulls_begin[counts[2]++] = index++; }); + *values.data(), [&](bool v) { p.non_null_like_range[counts[v]++] = index++; }, + [&]() { p.non_null_like_range[counts[2]++] = index++; }); return p; } }; @@ -422,9 +412,10 @@ class ArrayCountOrCompareSorter { using c_type = typename ArrowType::c_type; public: - Result operator()(std::span indices, const Array& array, - int64_t offset, const ArraySortOptions& options, - ExecContext* ctx) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext* ctx) { const auto& values = checked_cast(array); if (values.length() >= countsort_min_len_ && values.length() > values.null_count()) { @@ -462,11 +453,12 @@ class ArrayCountOrCompareSorter { class ArrayNullSorter { public: - Result operator()(std::span indices, const Array& values, - int64_t offset, const ArraySortOptions& options, - ExecContext*) { - return NullPartitionResult::NullsOnly(indices.data(), indices.data() + indices.size(), - options.null_placement); + Result operator()(std::span indices, + const Array& values, int64_t offset, + const ArraySortOptions& options, + ExecContext*) { + return PartitionResultByNullLikeness::fromCounts(indices, 0, 0, indices.size(), + options.null_placement); } }; diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc index 05f06118e1cc..b95314679eed 100644 --- a/cpp/src/arrow/compute/kernels/vector_rank.cc +++ b/cpp/src/arrow/compute/kernels/vector_rank.cc @@ -37,16 +37,16 @@ namespace { // is the same as the value at the previous sort index. constexpr uint64_t kDuplicateMask = 1ULL << 63; -template -void MarkDuplicates(const NullPartitionResult& sorted, ValueSelector&& value_selector, - IsNullSelector&& is_null_selector) { +template +void MarkDuplicates(const PartitionResultByNullLikeness& sorted, + ValueSelector&& value_selector) { using T = decltype(value_selector(int64_t{})); // Process non-nulls - if (sorted.non_nulls_end != sorted.non_nulls_begin) { - auto it = sorted.non_nulls_begin; + if (!sorted.non_null_like_range.empty()) { + auto it = sorted.non_null_like_range.begin(); T prev_value = value_selector(*it); - while (++it < sorted.non_nulls_end) { + while (++it < sorted.non_null_like_range.end()) { T curr_value = value_selector(*it); if (curr_value == prev_value) { *it |= kDuplicateMask; @@ -55,22 +55,23 @@ void MarkDuplicates(const NullPartitionResult& sorted, ValueSelector&& value_sel } } + // Process nans + if (!sorted.nan_range.empty()) { + for (auto& index : sorted.nan_range.subspan(1)) { + index |= kDuplicateMask; + } + } + // Process nulls - if (sorted.nulls_end != sorted.nulls_begin) { - auto it = sorted.nulls_begin; - bool prev_is_null = is_null_selector(*it); - while (++it < sorted.nulls_end) { - bool curr_is_null = is_null_selector(*it); - if (curr_is_null == prev_is_null) { - *it |= kDuplicateMask; - } - prev_is_null = curr_is_null; + if (!sorted.null_range.empty()) { + for (auto& index : sorted.null_range.subspan(1)) { + index |= kDuplicateMask; } } } template -Result DoSortAndMarkDuplicate( +Result DoSortAndMarkDuplicate( ExecContext* ctx, std::span indices, const Array& input, const std::shared_ptr& physical_type, const SortOrder order, const NullPlacement null_placement, bool needs_duplicates) { @@ -88,24 +89,19 @@ Result DoSortAndMarkDuplicate( auto value_selector = [&array](int64_t index) { return GetView::LogicalValue(array.GetView(index)); }; - if constexpr (has_null_like_values()) { - auto is_null_selector = [&array](int64_t index) { return array.IsNull(index); }; - MarkDuplicates(sorted, value_selector, is_null_selector); - } else { - MarkDuplicates(sorted, value_selector, [](int64_t) { return true; }); - } + MarkDuplicates(sorted, value_selector); } return sorted; } template -Result DoSortAndMarkDuplicate( +Result DoSortAndMarkDuplicate( ExecContext* ctx, std::span indices, const ChunkedArray& input, const std::shared_ptr& physical_type, const SortOrder order, const NullPlacement null_placement, bool needs_duplicates) { auto physical_chunks = GetPhysicalChunks(input, physical_type); if (physical_chunks.empty()) { - return NullPartitionResult{}; + return PartitionResultByNullLikeness::fromCounts(indices, 0, 0, 0, null_placement); } ARROW_ASSIGN_OR_RAISE( auto sorted, SortChunkedArray(ctx, indices, physical_type, physical_chunks, order, @@ -116,15 +112,7 @@ Result DoSortAndMarkDuplicate( ChunkedArrayResolver(std::span(arrays))](int64_t index) { return resolver.Resolve(index).Value(); }; - if constexpr (has_null_like_values()) { - auto is_null_selector = - [resolver = ChunkedArrayResolver(std::span(arrays))](int64_t index) { - return resolver.Resolve(index).IsNull(); - }; - MarkDuplicates(sorted, value_selector, is_null_selector); - } else { - MarkDuplicates(sorted, value_selector, [](int64_t) { return true; }); - } + MarkDuplicates(sorted, value_selector); } return sorted; } @@ -144,7 +132,7 @@ class SortAndMarkDuplicate : public TypeVisitor { needs_duplicates_(needs_duplicate), physical_type_(GetPhysicalType(input.type())) {} - Result Run() { + Result Run() { RETURN_NOT_OK(physical_type_->Accept(this)); return sorted_; } @@ -169,13 +157,14 @@ class SortAndMarkDuplicate : public TypeVisitor { const NullPlacement null_placement_; const bool needs_duplicates_; const std::shared_ptr physical_type_; - NullPartitionResult sorted_{}; + PartitionResultByNullLikeness sorted_{}; }; // A CRTP-based helper class for "rank_normal" and "rank_quantile" template struct BaseQuantileRanker { - Result CreateRankings(ExecContext* ctx, const NullPartitionResult& sorted) { + Result CreateRankings(ExecContext* ctx, + const PartitionResultByNullLikeness& sorted) { const int64_t length = sorted.overall_end() - sorted.overall_begin(); ARROW_ASSIGN_OR_RAISE(auto rankings, MakeMutableFloat64Array(length, ctx->memory_pool())); @@ -225,7 +214,8 @@ struct NormalRanker : public BaseQuantileRanker { struct OrdinalRanker { explicit OrdinalRanker(RankOptions::Tiebreaker tiebreaker) : tiebreaker_(tiebreaker) {} - Result CreateRankings(ExecContext* ctx, const NullPartitionResult& sorted) { + Result CreateRankings(ExecContext* ctx, + const PartitionResultByNullLikeness& sorted) { const int64_t length = sorted.overall_end() - sorted.overall_begin(); ARROW_ASSIGN_OR_RAISE(auto rankings, MakeMutableUInt64Array(length, ctx->memory_pool())); diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index 599e79845de7..e5f41fad77a3 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -48,7 +48,8 @@ class ChunkedArraySorter : public TypeVisitor { ChunkedArraySorter(ExecContext* ctx, std::span indices, const std::shared_ptr& physical_type, const ArrayVector& physical_chunks, const SortOrder order, - const NullPlacement null_placement, NullPartitionResult* output) + const NullPlacement null_placement, + PartitionResultByNullLikeness* output) : TypeVisitor(), indices_(indices), physical_type_(physical_type), @@ -82,10 +83,9 @@ class ChunkedArraySorter : public TypeVisitor { ArraySortOptions options(order_, null_placement_); const auto num_chunks = static_cast(physical_chunks_.size()); if (num_chunks == 0) { - *output_ = {.non_nulls_begin = indices_.data() + indices_.size(), - .non_nulls_end = indices_.data() + indices_.size(), - .nulls_begin = indices_.data() + indices_.size(), - .nulls_end = indices_.data() + indices_.size()}; + DCHECK_EQ(static_cast(indices_.size()), 0); + *output_ = + PartitionResultByNullLikeness::fromCounts(indices_, 0, 0, 0, null_placement_); return Status::OK(); } const int64_t num_indices = static_cast(indices_.size()); @@ -93,7 +93,7 @@ class ChunkedArraySorter : public TypeVisitor { // Sort each chunk independently and merge to sorted indices. // This is a serial implementation. - std::vector sorted(num_chunks); + std::vector sorted(num_chunks); // First sort all individual chunks int64_t begin_offset = 0; @@ -116,20 +116,11 @@ class ChunkedArraySorter : public TypeVisitor { chunked_mapper.LogicalToPhysical()); auto [chunked_indices_begin, chunked_indices_end] = chunked_indices_pair; - std::vector chunk_sorted(num_chunks); + std::vector chunk_sorted(num_chunks); for (int i = 0; i < num_chunks; ++i) { chunk_sorted[i] = sorted[i].TranslateTo(indices_.data(), chunked_indices_begin); } - auto merge_nulls = [&](CompressedChunkLocation* nulls_begin, - CompressedChunkLocation* nulls_middle, - CompressedChunkLocation* nulls_end, - CompressedChunkLocation* temp_indices, int64_t null_count) { - if (has_null_like_values()) { - PartitionNullsOnly(nulls_begin, nulls_end, arrays, - null_count, null_placement_); - } - }; auto merge_non_nulls = [&](CompressedChunkLocation* range_begin, CompressedChunkLocation* range_middle, CompressedChunkLocation* range_end, CompressedChunkLocation* temp_indices) { @@ -138,8 +129,7 @@ class ChunkedArraySorter : public TypeVisitor { {temp_indices, static_cast(range_end - range_begin)}); }; - ChunkedMergeImpl merge_impl{null_placement_, std::move(merge_nulls), - std::move(merge_non_nulls)}; + ChunkedMergeImpl merge_impl{null_placement_, std::move(merge_non_nulls)}; // std::merge is only called on non-null values, so size temp indices accordingly RETURN_NOT_OK(merge_impl.Init(ctx_, num_indices - null_count)); @@ -168,10 +158,11 @@ class ChunkedArraySorter : public TypeVisitor { } DCHECK_EQ(sorted.size(), 1); - DCHECK_EQ(sorted[0].overall_begin(), indices_.data()); - DCHECK_EQ(sorted[0].overall_end(), indices_.data() + indices_.size()); + DCHECK_EQ(sorted[0].toLegacyNullPartitionResult().overall_begin(), indices_.data()); + DCHECK_EQ(sorted[0].toLegacyNullPartitionResult().overall_end(), + indices_.data() + indices_.size()); // Note that "nulls" can also include NaNs, hence the >= check - DCHECK_GE(sorted[0].null_count(), null_count); + DCHECK_GE(sorted[0].toLegacyNullPartitionResult().null_count(), null_count); *output_ = sorted[0]; return Status::OK(); @@ -222,7 +213,7 @@ class ChunkedArraySorter : public TypeVisitor { const NullPlacement null_placement_; ArraySortFunc array_sorter_; ExecContext* ctx_; - NullPartitionResult* output_; + PartitionResultByNullLikeness* output_; }; // ---------------------------------------------------------------------- @@ -262,7 +253,8 @@ class RecordBatchColumnSorter { : next_column_(next_column) {} virtual ~RecordBatchColumnSorter() {} - virtual NullPartitionResult SortRange(std::span indices, int64_t offset) = 0; + virtual PartitionResultByNullLikeness SortRange(std::span indices, + int64_t offset) = 0; protected: RecordBatchColumnSorter* next_column_; @@ -283,7 +275,8 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter { null_placement_(null_placement), null_count_(array_.null_count()) {} - NullPartitionResult SortRange(std::span indices, int64_t offset) override { + PartitionResultByNullLikeness SortRange(std::span indices, + int64_t offset) override { using GetView = GetViewType; PartitionResultByNullLikeness partitions; @@ -329,7 +322,7 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter { array_, partitions.non_null_like_range, offset, [&](std::span indices) { SortNextColumn(indices, offset); }); } - return partitions.toLegacyNullPartitionResult(); + return partitions; } void SortNextColumn(std::span indices, int64_t offset) { @@ -355,12 +348,12 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter RecordBatchColumnSorter* next_column = nullptr) : RecordBatchColumnSorter(next_column), null_placement_(null_placement) {} - NullPartitionResult SortRange(std::span indices, int64_t offset) { + PartitionResultByNullLikeness SortRange(std::span indices, int64_t offset) { if (next_column_ != nullptr) { next_column_->SortRange(indices, offset); } - return NullPartitionResult::NullsOnly(indices.data(), indices.data() + indices.size(), - null_placement_); + return PartitionResultByNullLikeness::fromCounts(indices, 0, 0, indices.size(), + null_placement_); } protected: @@ -403,7 +396,7 @@ class RadixRecordBatchSorter { indices_(indices) {} // Offset is for table sorting - Result Sort(int64_t offset = 0) { + Result Sort(int64_t offset = 0) { ARROW_RETURN_NOT_OK(status_); // Create column sorters from right to left @@ -659,7 +652,7 @@ class TableSorter { if (num_batches == 0) { return Status::OK(); } - std::vector sorted(num_batches); + std::vector sorted(num_batches); // First sort all individual batches int64_t begin_offset = 0; @@ -674,10 +667,12 @@ class TableSorter { ARROW_ASSIGN_OR_RAISE(sorted[i], sorter.Sort(begin_offset)); DCHECK_EQ(sorted[i].overall_begin(), indices_.data() + begin_offset); DCHECK_EQ(sorted[i].overall_end(), indices_.data() + end_offset); - DCHECK_EQ(sorted[i].non_null_count() + sorted[i].null_count(), batch.num_rows()); + DCHECK_EQ(static_cast(sorted[i].non_null_like_range.size() + + sorted[i].null_range.size()), + batch.num_rows()); begin_offset = end_offset; // XXX this is an upper bound on the true null count - null_count += sorted[i].null_count(); + null_count += sorted[i].null_range.size(); } DCHECK_EQ(end_offset, static_cast(indices_.size())); @@ -688,14 +683,14 @@ class TableSorter { chunked_mapper.LogicalToPhysical()); auto [chunked_indices_begin, chunked_indices_end] = chunked_indices_pair; - std::vector chunk_sorted(num_batches); + std::vector chunk_sorted(num_batches); for (int64_t i = 0; i < num_batches; ++i) { chunk_sorted[i] = sorted[i].TranslateTo(indices_.data(), chunked_indices_begin); } struct Visitor { TableSorter* sorter; - std::vector* chunk_sorted; + std::vector* chunk_sorted; int64_t null_count; #define VISIT(TYPE) \ @@ -726,23 +721,15 @@ class TableSorter { // Recursive merge routine, typed on the first sort key template - Status MergeInternal(std::vector* sorted, + Status MergeInternal(std::vector* sorted, int64_t null_count) { - auto merge_nulls = [&](CompressedChunkLocation* nulls_begin, - CompressedChunkLocation* nulls_middle, - CompressedChunkLocation* nulls_end, - CompressedChunkLocation* temp_indices, int64_t null_count) { - MergeNulls(nulls_begin, nulls_middle, nulls_end, temp_indices, - null_count); - }; auto merge_non_nulls = [&](CompressedChunkLocation* range_begin, CompressedChunkLocation* range_middle, CompressedChunkLocation* range_end, CompressedChunkLocation* temp_indices) { MergeNonNulls(range_begin, range_middle, range_end, temp_indices); }; - ChunkedMergeImpl merge_impl(sort_keys_[0].null_placement, std::move(merge_nulls), - std::move(merge_non_nulls)); + ChunkedMergeImpl merge_impl(sort_keys_[0].null_placement, std::move(merge_non_nulls)); RETURN_NOT_OK(merge_impl.Init(ctx_, table_.num_rows())); while (sorted->size() > 1) { @@ -1133,32 +1120,33 @@ Result> FindSortKeys(const Schema& schema, return SortFieldPopulator{}.FindSortKeys(schema, sort_keys); } -Result SortChunkedArray(ExecContext* ctx, - std::span indices, - const ChunkedArray& chunked_array, - SortOrder sort_order, - NullPlacement null_placement) { +Result SortChunkedArray(ExecContext* ctx, + std::span indices, + const ChunkedArray& chunked_array, + SortOrder sort_order, + NullPlacement null_placement) { auto physical_type = GetPhysicalType(chunked_array.type()); auto physical_chunks = GetPhysicalChunks(chunked_array, physical_type); return SortChunkedArray(ctx, indices, physical_type, physical_chunks, sort_order, null_placement); } -Result SortChunkedArray( +Result SortChunkedArray( ExecContext* ctx, std::span indices, const std::shared_ptr& physical_type, const ArrayVector& physical_chunks, SortOrder sort_order, NullPlacement null_placement) { - NullPartitionResult output; + PartitionResultByNullLikeness output; ChunkedArraySorter sorter(ctx, indices, physical_type, physical_chunks, sort_order, null_placement, &output); RETURN_NOT_OK(sorter.Sort()); return output; } -Result SortStructArray(ExecContext* ctx, std::span indices, - const StructArray& array, - SortOrder sort_order, - NullPlacement null_placement) { +Result SortStructArray(ExecContext* ctx, + std::span indices, + const StructArray& array, + SortOrder sort_order, + NullPlacement null_placement) { ARROW_ASSIGN_OR_RAISE(auto columns, array.Flatten()); auto batch = RecordBatch::Make(schema(array.type()->fields()), array.length(), std::move(columns)); diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index 54df6cef105a..ae84825f931d 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -27,9 +27,11 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/kernel.h" #include "arrow/compute/kernels/chunked_internal.h" +#include "arrow/compute/ordering.h" #include "arrow/table.h" #include "arrow/type.h" #include "arrow/type_traits.h" +#include "arrow/util/logging_internal.h" namespace arrow::compute::internal { @@ -174,15 +176,16 @@ using ChunkedNullPartitionResult = GenericNullPartitionResult struct GenericPartitionResultByNullLikeness { std::span non_null_like_range; - std::span null_range; std::span nan_range; + std::span null_range; IndexType* overall_begin() const { - return std::min(non_null_like_range.begin(), null_range.begin(), nan_range.begin()); + return std::min(non_null_like_range.data(), null_range.data()); } IndexType* overall_end() const { - return std::max(non_null_like_range.end(), null_range.end(), nan_range.end()); + return std::max(non_null_like_range.data() + non_null_like_range.size(), + null_range.data() + null_range.size()); } GenericNullPartitionResult toLegacyNullPartitionResult() const { @@ -200,10 +203,31 @@ struct GenericPartitionResultByNullLikeness { return {.non_null_like_range = {(non_null_like_range.data() - indices_begin) + target_indices_begin, non_null_like_range.size()}, - .null_range = {(null_range.data() - indices_begin) + target_indices_begin, - null_range.size()}, .nan_range = {(nan_range.data() - indices_begin) + target_indices_begin, - nan_range.size()}}; + nan_range.size()}, + .null_range = {(null_range.data() - indices_begin) + target_indices_begin, + null_range.size()}}; + } + + static GenericPartitionResultByNullLikeness fromCounts(std::span indices, + int64_t non_null_like_count, + int64_t nan_count, + int64_t null_count, + NullPlacement null_placement) { + GenericPartitionResultByNullLikeness p; + DCHECK_EQ(non_null_like_count + nan_count + null_count, + static_cast(indices.size())); + if (null_placement == NullPlacement::AtEnd) { + p.non_null_like_range = indices.subspan(0, non_null_like_count); + p.nan_range = indices.subspan(non_null_like_count, nan_count); + p.null_range = indices.subspan(0, null_count); + } else { + p.null_range = indices.subspan(0, null_count); + p.nan_range = indices.subspan(null_count, nan_count); + p.non_null_like_range = + indices.subspan(null_count + nan_count, non_null_like_count); + } + return p; } }; @@ -362,8 +386,8 @@ PartitionResultByNullLikeness PartitionNullsAndNans(std::span indices, p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); return PartitionResultByNullLikeness{ .non_null_like_range = {q.non_nulls_begin, q.non_nulls_end}, - .null_range = {p.nulls_begin, p.nulls_end}, - .nan_range = {q.nulls_begin, q.nulls_end}}; + .nan_range = {q.nulls_begin, q.nulls_end}, + .null_range = {p.nulls_begin, p.nulls_end}}; } template @@ -377,25 +401,17 @@ PartitionResultByNullLikeness PartitionNansOnly(std::span indices, p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); return PartitionResultByNullLikeness{ .non_null_like_range = {q.non_nulls_begin, q.non_nulls_end}, - .null_range = {p.nulls_begin, p.nulls_end}, - .nan_range = {q.nulls_begin, q.nulls_end}}; + .nan_range = {q.nulls_begin, q.nulls_end}, + .null_range = {p.nulls_begin, p.nulls_end}}; } struct ChunkedMergeImpl { - using MergeNullsFunc = std::function; - using MergeNonNullsFunc = std::function; - ChunkedMergeImpl(NullPlacement null_placement, MergeNullsFunc&& merge_nulls, - MergeNonNullsFunc&& merge_non_nulls) - : null_placement_(null_placement), - merge_nulls_(std::move(merge_nulls)), - merge_non_nulls_(std::move(merge_non_nulls)) {} + ChunkedMergeImpl(NullPlacement null_placement, MergeNonNullsFunc&& merge_non_nulls) + : null_placement_(null_placement), merge_non_nulls_(std::move(merge_non_nulls)) {} Status Init(ExecContext* ctx, int64_t temp_indices_length) { ARROW_ASSIGN_OR_RAISE(temp_buffer_, AllocateBuffer(sizeof(CompressedChunkLocation) * @@ -406,9 +422,9 @@ struct ChunkedMergeImpl { return Status::OK(); } - ChunkedNullPartitionResult Merge(const ChunkedNullPartitionResult& left, - const ChunkedNullPartitionResult& right, - int64_t null_count) const { + ChunkedPartitionResultByNullLikeness Merge( + const ChunkedPartitionResultByNullLikeness& left, + const ChunkedPartitionResultByNullLikeness& right, int64_t null_count) const { if (null_placement_ == NullPlacement::AtStart) { return MergeNullsAtStart(left, right, null_count); } else { @@ -416,71 +432,86 @@ struct ChunkedMergeImpl { } } - ChunkedNullPartitionResult MergeNullsAtStart(const ChunkedNullPartitionResult& left, - const ChunkedNullPartitionResult& right, - int64_t null_count) const { + ChunkedPartitionResultByNullLikeness MergeNullsAtStart( + const ChunkedPartitionResultByNullLikeness& left, + const ChunkedPartitionResultByNullLikeness& right, int64_t null_count) const { // Input layout: - // [left nulls .... left non-nulls .... right nulls .... right non-nulls] - ARROW_DCHECK_EQ(left.nulls_end, left.non_nulls_begin); - ARROW_DCHECK_EQ(left.non_nulls_end, right.nulls_begin); - ARROW_DCHECK_EQ(right.nulls_end, right.non_nulls_begin); - - // Mutate the input, stably, to obtain the following layout: - // [left nulls .... right nulls .... left non-nulls .... right non-nulls] - std::rotate(left.non_nulls_begin, right.nulls_begin, right.nulls_end); - - const auto p = ChunkedNullPartitionResult::NullsAtStart( - left.nulls_begin, right.non_nulls_end, - left.nulls_begin + left.null_count() + right.null_count()); - - // If the type has null-like values (such as NaN), ensure those plus regular - // nulls are partitioned in the right order. Note this assumes that all - // null-like values (e.g. NaN) are ordered equally. - if (p.null_count()) { - merge_nulls_(p.nulls_begin, p.nulls_begin + left.null_count(), p.nulls_end, - temp_indices_, null_count); - } - - // Merge the non-null values into temp area - ARROW_DCHECK_EQ(right.non_nulls_begin - p.non_nulls_begin, left.non_null_count()); - ARROW_DCHECK_EQ(p.non_nulls_end - right.non_nulls_begin, right.non_null_count()); - if (p.non_null_count()) { - merge_non_nulls_(p.non_nulls_begin, right.non_nulls_begin, p.non_nulls_end, + // [left nulls ... left nans ... left non-nulls ... right nulls ... right nans ... + // right non-nulls] + ARROW_DCHECK_EQ(left.null_range.end(), left.nan_range.begin()); + ARROW_DCHECK_EQ(left.nan_range.end(), left.non_null_like_range.begin()); + ARROW_DCHECK_EQ(left.non_null_like_range.end(), right.null_range.begin()); + ARROW_DCHECK_EQ(right.null_range.end(), right.nan_range.begin()); + ARROW_DCHECK_EQ(right.nan_range.end(), right.non_null_like_range.begin()); + + // Mutate the input, stably in two steps, to obtain the following layouts: + // [left nulls ... left nans ... right nulls ... right nans ... left non-nulls ... + // right non-nulls] + std::rotate(left.non_null_like_range.begin(), right.null_range.begin(), + right.nan_range.end()); + + // only use sizes of ranges that are at a different position now + // [left nulls ... right nulls ... left nans ... right nans ... left non-nulls ... + // right non-nulls] this is a no-op if no nan values are present + std::rotate(left.nan_range.begin(), left.nan_range.begin() + left.nan_range.size(), + left.nan_range.begin() + left.nan_range.size() + right.null_range.size()); + + std::span full_span{left.overall_begin(), + right.overall_end()}; + const auto p = ChunkedPartitionResultByNullLikeness::fromCounts( + full_span, left.non_null_like_range.size() + right.non_null_like_range.size(), + left.nan_range.size() + right.nan_range.size(), + left.null_range.size() + right.null_range.size(), NullPlacement::AtStart); + + if (!p.non_null_like_range.empty()) { + merge_non_nulls_(p.non_null_like_range.data(), + p.non_null_like_range.data() + right.non_null_like_range.size(), + p.non_null_like_range.data() + p.non_null_like_range.size(), temp_indices_); } return p; } - ChunkedNullPartitionResult MergeNullsAtEnd(const ChunkedNullPartitionResult& left, - const ChunkedNullPartitionResult& right, - int64_t null_count) const { + ChunkedPartitionResultByNullLikeness MergeNullsAtEnd( + const ChunkedPartitionResultByNullLikeness& left, + const ChunkedPartitionResultByNullLikeness& right, int64_t null_count) const { // Input layout: - // [left non-nulls .... left nulls .... right non-nulls .... right nulls] - ARROW_DCHECK_EQ(left.non_nulls_end, left.nulls_begin); - ARROW_DCHECK_EQ(left.nulls_end, right.non_nulls_begin); - ARROW_DCHECK_EQ(right.non_nulls_end, right.nulls_begin); - - // Mutate the input, stably, to obtain the following layout: - // [left non-nulls .... right non-nulls .... left nulls .... right nulls] - std::rotate(left.nulls_begin, right.non_nulls_begin, right.non_nulls_end); - - const auto p = ChunkedNullPartitionResult::NullsAtEnd( - left.non_nulls_begin, right.nulls_end, - left.non_nulls_begin + left.non_null_count() + right.non_null_count()); - - // If the type has null-like values (such as NaN), ensure those plus regular - // nulls are partitioned in the right order. Note this assumes that all - // null-like values (e.g. NaN) are ordered equally. - if (p.null_count()) { - merge_nulls_(p.nulls_begin, p.nulls_begin + left.null_count(), p.nulls_end, - temp_indices_, null_count); - } + // [left non-nulls ... left nans ... left nulls ... right non-nulls ... right nans ... + // right nulls] + ARROW_DCHECK_EQ(left.non_null_like_range.end(), left.nan_range.begin()); + ARROW_DCHECK_EQ(left.nan_range.end(), left.null_range.begin()); + ARROW_DCHECK_EQ(left.null_range.end(), right.non_null_like_range.begin()); + ARROW_DCHECK_EQ(right.non_null_like_range.end(), right.nan_range.begin()); + ARROW_DCHECK_EQ(right.nan_range.end(), right.null_range.begin()); + + // Mutate the input, stably in two steps, to obtain the following layouts: + // [left non-nulls ... right non-nulls ... left nans ... left nulls ... right nans ... + // right nulls] + std::rotate(left.nan_range.begin(), right.non_null_like_range.begin(), + right.non_null_like_range.end()); + + // only use sizes of ranges that are at a different position now + // [left non-nulls ... right non-nulls ... left nans ... left nulls ... right nans ... + // right nulls] this is a no-op if no nan values are present + auto new_left_null_range_begin = + left.non_null_like_range.begin() + left.non_null_like_range.size() + + right.non_null_like_range.size() + left.nan_range.size(); + std::rotate( + new_left_null_range_begin, new_left_null_range_begin + left.null_range.size(), + new_left_null_range_begin + left.null_range.size() + right.nan_range.size()); + + std::span full_span{left.overall_begin(), + right.overall_end()}; + const auto p = ChunkedPartitionResultByNullLikeness::fromCounts( + full_span, left.non_null_like_range.size() + right.non_null_like_range.size(), + left.nan_range.size() + right.nan_range.size(), + left.null_range.size() + right.null_range.size(), NullPlacement::AtEnd); // Merge the non-null values into temp area - ARROW_DCHECK_EQ(left.non_nulls_end - p.non_nulls_begin, left.non_null_count()); - ARROW_DCHECK_EQ(p.non_nulls_end - left.non_nulls_end, right.non_null_count()); - if (p.non_null_count()) { - merge_non_nulls_(p.non_nulls_begin, left.non_nulls_end, p.non_nulls_end, + if (!p.non_null_like_range.empty()) { + merge_non_nulls_(p.non_null_like_range.data(), + p.non_null_like_range.data() + left.non_null_like_range.size(), + p.non_null_like_range.data() + p.non_null_like_range.size(), temp_indices_); } return p; @@ -488,7 +519,6 @@ struct ChunkedMergeImpl { private: NullPlacement null_placement_; - MergeNullsFunc merge_nulls_; MergeNonNullsFunc merge_non_nulls_; std::unique_ptr temp_buffer_; CompressedChunkLocation* temp_indices_ = nullptr; @@ -497,27 +527,28 @@ struct ChunkedMergeImpl { // TODO make this usable if indices are non trivial on input // (see ConcreteRecordBatchColumnSorter) // `offset` is used when this is called on a chunk of a chunked array -using ArraySortFunc = std::function( +using ArraySortFunc = std::function( std::span indices, const Array& values, int64_t offset, const ArraySortOptions& options, ExecContext* ctx)>; Result GetArraySorter(const DataType& type); -Result SortChunkedArray(ExecContext* ctx, - std::span indices, - const ChunkedArray& chunked_array, - SortOrder sort_order, - NullPlacement null_placement); +Result SortChunkedArray(ExecContext* ctx, + std::span indices, + const ChunkedArray& chunked_array, + SortOrder sort_order, + NullPlacement null_placement); -Result SortChunkedArray( +Result SortChunkedArray( ExecContext* ctx, std::span indices, const std::shared_ptr& physical_type, const ArrayVector& physical_chunks, SortOrder sort_order, NullPlacement null_placement); -Result SortStructArray(ExecContext* ctx, std::span indices, - const StructArray& array, - SortOrder sort_order, - NullPlacement null_placement); +Result SortStructArray(ExecContext* ctx, + std::span indices, + const StructArray& array, + SortOrder sort_order, + NullPlacement null_placement); // ---------------------------------------------------------------------- // Helpers for Sort/SelectK/Rank implementations From 49ac605a055d044d2652c941f54913451b6ffcf2 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Thu, 25 Jun 2026 01:03:14 +0200 Subject: [PATCH 10/12] make comments not clip into new line --- .../compute/kernels/vector_sort_internal.h | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index ae84825f931d..bd50ecddba59 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -436,8 +436,7 @@ struct ChunkedMergeImpl { const ChunkedPartitionResultByNullLikeness& left, const ChunkedPartitionResultByNullLikeness& right, int64_t null_count) const { // Input layout: - // [left nulls ... left nans ... left non-nulls ... right nulls ... right nans ... - // right non-nulls] + // [left nul .. left nan .. left non-nul .. right nul .. right nan .. right non-nul] ARROW_DCHECK_EQ(left.null_range.end(), left.nan_range.begin()); ARROW_DCHECK_EQ(left.nan_range.end(), left.non_null_like_range.begin()); ARROW_DCHECK_EQ(left.non_null_like_range.end(), right.null_range.begin()); @@ -445,14 +444,13 @@ struct ChunkedMergeImpl { ARROW_DCHECK_EQ(right.nan_range.end(), right.non_null_like_range.begin()); // Mutate the input, stably in two steps, to obtain the following layouts: - // [left nulls ... left nans ... right nulls ... right nans ... left non-nulls ... - // right non-nulls] + // [left nul .. left nan .. right nul .. right nan .. left non-nul .. right non-nus] std::rotate(left.non_null_like_range.begin(), right.null_range.begin(), right.nan_range.end()); // only use sizes of ranges that are at a different position now - // [left nulls ... right nulls ... left nans ... right nans ... left non-nulls ... - // right non-nulls] this is a no-op if no nan values are present + // [left nul .. right nul .. left nan .. right nan .. left non-nulls .. right + // non-nulls] this is a no-op if no nan values are present std::rotate(left.nan_range.begin(), left.nan_range.begin() + left.nan_range.size(), left.nan_range.begin() + left.nan_range.size() + right.null_range.size()); @@ -465,7 +463,7 @@ struct ChunkedMergeImpl { if (!p.non_null_like_range.empty()) { merge_non_nulls_(p.non_null_like_range.data(), - p.non_null_like_range.data() + right.non_null_like_range.size(), + p.non_null_like_range.data() + left.non_null_like_range.size(), p.non_null_like_range.data() + p.non_null_like_range.size(), temp_indices_); } @@ -476,8 +474,7 @@ struct ChunkedMergeImpl { const ChunkedPartitionResultByNullLikeness& left, const ChunkedPartitionResultByNullLikeness& right, int64_t null_count) const { // Input layout: - // [left non-nulls ... left nans ... left nulls ... right non-nulls ... right nans ... - // right nulls] + // [left non-nul .. left nan .. left nul .. right non-nul .. right nan .. right nulls] ARROW_DCHECK_EQ(left.non_null_like_range.end(), left.nan_range.begin()); ARROW_DCHECK_EQ(left.nan_range.end(), left.null_range.begin()); ARROW_DCHECK_EQ(left.null_range.end(), right.non_null_like_range.begin()); @@ -485,14 +482,13 @@ struct ChunkedMergeImpl { ARROW_DCHECK_EQ(right.nan_range.end(), right.null_range.begin()); // Mutate the input, stably in two steps, to obtain the following layouts: - // [left non-nulls ... right non-nulls ... left nans ... left nulls ... right nans ... - // right nulls] + // [left non-nul .. right non-nul .. left nan .. left nul .. right nan .. right nul] std::rotate(left.nan_range.begin(), right.non_null_like_range.begin(), right.non_null_like_range.end()); // only use sizes of ranges that are at a different position now - // [left non-nulls ... right non-nulls ... left nans ... left nulls ... right nans ... - // right nulls] this is a no-op if no nan values are present + // [left non-nul .. right non-nul .. left nan .. left nul .. right nan .. right nul] + // this is a no-op if no nan values are present auto new_left_null_range_begin = left.non_null_like_range.begin() + left.non_null_like_range.size() + right.non_null_like_range.size() + left.nan_range.size(); From a6d29421bcb7f63d83748f20d5c80ab00d8b755d Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Thu, 25 Jun 2026 01:58:13 +0200 Subject: [PATCH 11/12] bugfix --- cpp/src/arrow/compute/kernels/vector_array_sort.cc | 2 +- cpp/src/arrow/compute/kernels/vector_sort_internal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc index 1353018e812d..c462a0eecc48 100644 --- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc @@ -398,7 +398,7 @@ class ArrayCountSorter { int64_t index = offset; VisitRawValuesInline( *values.data(), [&](bool v) { p.non_null_like_range[counts[v]++] = index++; }, - [&]() { p.non_null_like_range[counts[2]++] = index++; }); + [&]() { p.null_range[counts[2]++] = index++; }); return p; } }; diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index bd50ecddba59..6ca41708f5ae 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -220,7 +220,7 @@ struct GenericPartitionResultByNullLikeness { if (null_placement == NullPlacement::AtEnd) { p.non_null_like_range = indices.subspan(0, non_null_like_count); p.nan_range = indices.subspan(non_null_like_count, nan_count); - p.null_range = indices.subspan(0, null_count); + p.null_range = indices.subspan(non_null_like_count + nan_count, null_count); } else { p.null_range = indices.subspan(0, null_count); p.nan_range = indices.subspan(null_count, nan_count); From 54c3aca1e2f1b4225497b43a968044fec0ff63e5 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Thu, 25 Jun 2026 02:09:25 +0200 Subject: [PATCH 12/12] consolidate more functions --- cpp/src/arrow/compute/kernels/vector_sort.cc | 75 +++-- .../compute/kernels/vector_sort_internal.h | 288 +++++------------- 2 files changed, 114 insertions(+), 249 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index e5f41fad77a3..e7190ff84b42 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -158,11 +158,10 @@ class ChunkedArraySorter : public TypeVisitor { } DCHECK_EQ(sorted.size(), 1); - DCHECK_EQ(sorted[0].toLegacyNullPartitionResult().overall_begin(), indices_.data()); - DCHECK_EQ(sorted[0].toLegacyNullPartitionResult().overall_end(), - indices_.data() + indices_.size()); + DCHECK_EQ(sorted[0].overall_begin(), indices_.data()); + DCHECK_EQ(sorted[0].overall_end(), indices_.data() + indices_.size()); // Note that "nulls" can also include NaNs, hence the >= check - DCHECK_GE(sorted[0].toLegacyNullPartitionResult().null_count(), null_count); + DCHECK_GE(static_cast(sorted[0].null_range.size()), null_count); *output_ = sorted[0]; return Status::OK(); @@ -523,25 +522,24 @@ class MultipleKeyRecordBatchSorter : public TypeVisitor { const auto p = PartitionNullsInternal(first_sort_key); // Sort first-key non-nulls - std::stable_sort( - p.non_nulls_begin, p.non_nulls_end, [&](uint64_t left, uint64_t right) { - // Both values are never null nor NaN - // (otherwise they've been partitioned away above). - const auto value_left = GetView::LogicalValue(array.GetView(left)); - const auto value_right = GetView::LogicalValue(array.GetView(right)); - if (value_left != value_right) { - bool compared = value_left < value_right; - if (first_sort_key.order == SortOrder::Ascending) { - return compared; - } else { - return !compared; - } - } - // If the left value equals to the right value, - // we need to compare the second and following - // sort keys. - return comparator.Compare(left, right, 1); - }); + std::ranges::stable_sort(p.non_null_like_range, [&](uint64_t left, uint64_t right) { + // Both values are never null nor NaN + // (otherwise they've been partitioned away above). + const auto value_left = GetView::LogicalValue(array.GetView(left)); + const auto value_right = GetView::LogicalValue(array.GetView(right)); + if (value_left != value_right) { + bool compared = value_left < value_right; + if (first_sort_key.order == SortOrder::Ascending) { + return compared; + } else { + return !compared; + } + } + // If the left value equals to the right value, + // we need to compare the second and following + // sort keys. + return comparator.Compare(left, right, 1); + }); return comparator_.status(); } @@ -555,37 +553,34 @@ class MultipleKeyRecordBatchSorter : public TypeVisitor { // Behaves like PartitionNulls() but this supports multiple sort keys. template - NullPartitionResult PartitionNullsInternal(const ResolvedSortKey& first_sort_key) { + PartitionResultByNullLikeness PartitionNullsInternal( + const ResolvedSortKey& first_sort_key) { using ArrayType = typename TypeTraits::ArrayType; const ArrayType& array = ::arrow::internal::checked_cast(first_sort_key.array); - const auto p = PartitionNullsOnly( - indices_.data(), indices_.data() + indices_.size(), array, 0, - first_sort_key.null_placement); - const auto q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, array, 0, first_sort_key.null_placement); + const auto p = PartitionNullsAndNans( + indices_, array, 0, first_sort_key.null_placement); auto& comparator = comparator_; - if (q.nulls_begin != q.nulls_end) { + if (!p.nan_range.empty()) { // Sort all NaNs by the second and following sort keys. // TODO: could we instead run an independent sort from the second key on // this slice? - std::stable_sort(q.nulls_begin, q.nulls_end, - [&comparator](uint64_t left, uint64_t right) { - return comparator.Compare(left, right, 1); - }); + std::ranges::stable_sort(p.nan_range, [&comparator](uint64_t left, uint64_t right) { + return comparator.Compare(left, right, 1); + }); } - if (p.nulls_begin != p.nulls_end) { + if (!p.null_range.empty()) { // Sort all nulls by the second and following sort keys. // TODO: could we instead run an independent sort from the second key on // this slice? - std::stable_sort(p.nulls_begin, p.nulls_end, - [&comparator](uint64_t left, uint64_t right) { - return comparator.Compare(left, right, 1); - }); + std::ranges::stable_sort(p.null_range, + [&comparator](uint64_t left, uint64_t right) { + return comparator.Compare(left, right, 1); + }); } - return q; + return p; } std::span indices_; diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index 6ca41708f5ae..8684cc71b2a1 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -57,19 +57,16 @@ namespace arrow::compute::internal { // NOTE: std::partition is usually faster than std::stable_partition. struct NonStablePartitioner { - template - IndexType* operator()(IndexType* indices_begin, IndexType* indices_end, - Predicate&& pred) { - return std::partition(indices_begin, indices_end, std::forward(pred)); + template + auto operator()(std::span indices, Predicate&& pred) { + return std::ranges::partition(indices, std::forward(pred)); } }; struct StablePartitioner { - template - IndexType* operator()(IndexType* indices_begin, IndexType* indices_end, - Predicate&& pred) { - return std::stable_partition(indices_begin, indices_end, - std::forward(pred)); + template + auto operator()(std::span indices, Predicate&& pred) { + return std::ranges::stable_partition(indices, std::forward(pred)); } }; @@ -108,71 +105,6 @@ int CompareTypeValues(Value&& left, Value&& right, SortOrder order, return compared; } -template -struct GenericNullPartitionResult { - IndexType* non_nulls_begin; - IndexType* non_nulls_end; - IndexType* nulls_begin; - IndexType* nulls_end; - - IndexType* overall_begin() const { return std::min(nulls_begin, non_nulls_begin); } - - IndexType* overall_end() const { return std::max(nulls_end, non_nulls_end); } - - int64_t non_null_count() const { return non_nulls_end - non_nulls_begin; } - - int64_t null_count() const { return nulls_end - nulls_begin; } - - static GenericNullPartitionResult NoNulls(IndexType* indices_begin, - IndexType* indices_end, - NullPlacement null_placement) { - if (null_placement == NullPlacement::AtStart) { - return {indices_begin, indices_end, indices_begin, indices_begin}; - } else { - return {indices_begin, indices_end, indices_end, indices_end}; - } - } - - static GenericNullPartitionResult NullsOnly(IndexType* indices_begin, - IndexType* indices_end, - NullPlacement null_placement) { - if (null_placement == NullPlacement::AtStart) { - return {indices_end, indices_end, indices_begin, indices_end}; - } else { - return {indices_begin, indices_begin, indices_begin, indices_end}; - } - } - - static GenericNullPartitionResult NullsAtEnd(IndexType* indices_begin, - IndexType* indices_end, - IndexType* midpoint) { - ARROW_DCHECK_GE(midpoint, indices_begin); - ARROW_DCHECK_LE(midpoint, indices_end); - return {indices_begin, midpoint, midpoint, indices_end}; - } - - static GenericNullPartitionResult NullsAtStart(IndexType* indices_begin, - IndexType* indices_end, - IndexType* midpoint) { - ARROW_DCHECK_GE(midpoint, indices_begin); - ARROW_DCHECK_LE(midpoint, indices_end); - return {midpoint, indices_end, indices_begin, midpoint}; - } - - template - GenericNullPartitionResult TranslateTo( - IndexType* indices_begin, TargetIndexType* target_indices_begin) const { - return { - (non_nulls_begin - indices_begin) + target_indices_begin, - (non_nulls_end - indices_begin) + target_indices_begin, - (nulls_begin - indices_begin) + target_indices_begin, - (nulls_end - indices_begin) + target_indices_begin, - }; - } -}; -using NullPartitionResult = GenericNullPartitionResult; -using ChunkedNullPartitionResult = GenericNullPartitionResult; - template struct GenericPartitionResultByNullLikeness { std::span non_null_like_range; @@ -188,15 +120,6 @@ struct GenericPartitionResultByNullLikeness { null_range.data() + null_range.size()); } - GenericNullPartitionResult toLegacyNullPartitionResult() const { - return GenericNullPartitionResult{ - non_null_like_range.data(), - non_null_like_range.data() + non_null_like_range.size(), - std::min(null_range.data(), nan_range.data()), - std::max(null_range.data() + null_range.size(), - nan_range.data() + nan_range.size())}; - } - template GenericPartitionResultByNullLikeness TranslateTo( IndexType* indices_begin, TargetIndexType* target_indices_begin) const { @@ -235,142 +158,92 @@ using PartitionResultByNullLikeness = GenericPartitionResultByNullLikeness; -// Move nulls (not null-like values) to end of array. -// -// `offset` is used when this is called on a chunk of a chunked array -template -NullPartitionResult PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indices_end, - const Array& values, int64_t offset, - NullPlacement null_placement) { - if (values.null_count() == 0) { - return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement); - } - Partitioner partitioner; - if (null_placement == NullPlacement::AtStart) { - auto nulls_end = partitioner( - indices_begin, indices_end, - [&values, &offset](uint64_t ind) { return values.IsNull(ind - offset); }); - return NullPartitionResult::NullsAtStart(indices_begin, indices_end, nulls_end); - } else { - auto nulls_begin = partitioner( - indices_begin, indices_end, - [&values, &offset](uint64_t ind) { return !values.IsNull(ind - offset); }); - return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, nulls_begin); - } -} +struct NullPartition { + std::span non_nulls; + std::span nulls; -// Move non-null null-like values to end of array. -// -// `offset` is used when this is called on a chunk of a chunked array -template -NullPartitionResult PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, - const ArrayType& values, int64_t offset, - NullPlacement null_placement) { - if constexpr (has_null_like_values()) { - Partitioner partitioner; + static NullPartition NoNulls(std::span indices, + NullPlacement null_placement) { if (null_placement == NullPlacement::AtStart) { - auto null_likes_end = - partitioner(indices_begin, indices_end, [&values, &offset](uint64_t ind) { - return std::isnan(values.GetView(ind - offset)); - }); - return NullPartitionResult::NullsAtStart(indices_begin, indices_end, - null_likes_end); + return {.non_nulls = indices, .nulls = indices.subspan(0, 0)}; } else { - auto null_likes_begin = - partitioner(indices_begin, indices_end, [&values, &offset](uint64_t ind) { - return !std::isnan(values.GetView(ind - offset)); - }); - return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - null_likes_begin); + return {.non_nulls = indices, .nulls = indices.subspan(indices.size(), 0)}; } - } else { - return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement); } -} -// -// Null partitioning on chunked arrays, in two flavors: -// 1) with uint64_t indices and ChunkedArrayResolver -// 2) with CompressedChunkLocation and span of chunks -// + static NullPartition NullsAtEnd(std::span indices, + std::span null_tail) { + ARROW_DCHECK_GE(null_tail.begin(), indices.begin()); + ARROW_DCHECK_LE(null_tail.begin(), indices.end()); + return {.non_nulls = {indices.begin(), null_tail.begin()}, .nulls = null_tail}; + } + static NullPartition NullsAtStart(std::span indices, + std::span non_null_tail) { + ARROW_DCHECK_GE(non_null_tail.begin(), indices.begin()); + ARROW_DCHECK_LE(non_null_tail.begin(), indices.end()); + return {.non_nulls = non_null_tail, + .nulls = {indices.begin(), non_null_tail.begin()}}; + } +}; + +// Move nulls (not null-like values) to end of array. +// +// `offset` is used when this is called on a chunk of a chunked array template -NullPartitionResult PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indices_end, - const ChunkedArrayResolver& resolver, - int64_t null_count, NullPlacement null_placement) { - if (null_count == 0) { - return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement); +NullPartition PartitionNullsOnly(std::span indices, const Array& values, + int64_t offset, NullPlacement null_placement) { + if (values.null_count() == 0) { + return NullPartition::NoNulls(indices, null_placement); } Partitioner partitioner; if (null_placement == NullPlacement::AtStart) { - auto nulls_end = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return chunk.IsNull(); + auto non_null_tail = partitioner(indices, [&values, &offset](uint64_t ind) { + return values.IsNull(ind - offset); }); - return NullPartitionResult::NullsAtStart(indices_begin, indices_end, nulls_end); + return NullPartition::NullsAtStart(indices, non_null_tail); } else { - auto nulls_begin = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return !chunk.IsNull(); + auto null_tail = partitioner(indices, [&values, &offset](uint64_t ind) { + return !values.IsNull(ind - offset); }); - return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, nulls_begin); + return NullPartition::NullsAtEnd(indices, null_tail); } } -template -ChunkedNullPartitionResult PartitionNullsOnly(CompressedChunkLocation* locations_begin, - CompressedChunkLocation* locations_end, - std::span chunks, - int64_t null_count, - NullPlacement null_placement) { - if (null_count == 0) { - return ChunkedNullPartitionResult::NoNulls(locations_begin, locations_end, - null_placement); - } - Partitioner partitioner; - if (null_placement == NullPlacement::AtStart) { - auto nulls_end = - partitioner(locations_begin, locations_end, [&](CompressedChunkLocation loc) { - return chunks[loc.chunk_index()]->IsNull( - static_cast(loc.index_in_chunk())); - }); - return ChunkedNullPartitionResult::NullsAtStart(locations_begin, locations_end, - nulls_end); - } else { - auto nulls_begin = - partitioner(locations_begin, locations_end, [&](CompressedChunkLocation loc) { - return !chunks[loc.chunk_index()]->IsNull( - static_cast(loc.index_in_chunk())); - }); - return ChunkedNullPartitionResult::NullsAtEnd(locations_begin, locations_end, - nulls_begin); - } -} +struct NanPartition { + std::span non_null_like_range; + std::span nan_range; +}; -template -NullPartitionResult PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, - const ChunkedArrayResolver& resolver, - NullPlacement null_placement) { +// Move non-null null-like values to end of array. +// +// `offset` is used when this is called on a chunk of a chunked array +template +NanPartition PartitionNans(std::span indices, const ArrayType& values, + int64_t offset, NullPlacement null_placement) { if constexpr (has_null_like_values()) { Partitioner partitioner; if (null_placement == NullPlacement::AtStart) { - auto null_likes_end = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return std::isnan(chunk.Value()); + auto non_null_like_tail = partitioner(indices, [&values, &offset](uint64_t ind) { + return std::isnan(values.GetView(ind - offset)); }); - return NullPartitionResult::NullsAtStart(indices_begin, indices_end, - null_likes_end); + return NanPartition{.non_null_like_range = non_null_like_tail, + .nan_range = {indices.data(), non_null_like_tail.data()}}; } else { - auto null_likes_begin = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return !std::isnan(chunk.Value()); + auto nan_tail = partitioner(indices, [&values, &offset](uint64_t ind) { + return !std::isnan(values.GetView(ind - offset)); }); - return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - null_likes_begin); + return NanPartition{.non_null_like_range = {indices.data(), nan_tail.data()}, + .nan_range = nan_tail}; } } else { - return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement); + if (null_placement == NullPlacement::AtStart) { + return NanPartition{.non_null_like_range = indices, + .nan_range = {indices.data(), indices.data()}}; + } else { + return NanPartition{.non_null_like_range = indices, + .nan_range = indices.subspan(indices.size())}; + } } } @@ -380,14 +253,13 @@ PartitionResultByNullLikeness PartitionNullsAndNans(std::span indices, int64_t offset, NullPlacement null_placement) { // Partition nulls at start (resp. end), and null-like values just before (resp. after) - NullPartitionResult p = PartitionNullsOnly( - indices.data(), indices.data() + indices.size(), values, offset, null_placement); - NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); - return PartitionResultByNullLikeness{ - .non_null_like_range = {q.non_nulls_begin, q.non_nulls_end}, - .nan_range = {q.nulls_begin, q.nulls_end}, - .null_range = {p.nulls_begin, p.nulls_end}}; + NullPartition p = + PartitionNullsOnly(indices, values, offset, null_placement); + auto q = + PartitionNans(p.non_nulls, values, offset, null_placement); + return PartitionResultByNullLikeness{.non_null_like_range = q.non_null_like_range, + .nan_range = q.nan_range, + .null_range = p.nulls}; } template @@ -395,14 +267,12 @@ PartitionResultByNullLikeness PartitionNansOnly(std::span indices, const ArrayType& values, int64_t offset, NullPlacement null_placement) { // Partition nulls at start (resp. end), and null-like values just before (resp. after) - NullPartitionResult p = NullPartitionResult::NoNulls( - indices.data(), indices.data() + indices.size(), null_placement); - NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); - return PartitionResultByNullLikeness{ - .non_null_like_range = {q.non_nulls_begin, q.non_nulls_end}, - .nan_range = {q.nulls_begin, q.nulls_end}, - .null_range = {p.nulls_begin, p.nulls_end}}; + NullPartition p = NullPartition::NoNulls(indices, null_placement); + auto q = + PartitionNans(p.non_nulls, values, offset, null_placement); + return PartitionResultByNullLikeness{.non_null_like_range = q.non_null_like_range, + .nan_range = q.nan_range, + .null_range = p.nulls}; } struct ChunkedMergeImpl {