diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.cc b/cpp/src/arrow/compute/kernels/chunked_internal.cc index 39495ea0e68..38a37ca4eff 100644 --- a/cpp/src/arrow/compute/kernels/chunked_internal.cc +++ b/cpp/src/arrow/compute/kernels/chunked_internal.cc @@ -67,13 +67,13 @@ ChunkedIndexMapper::LogicalToPhysical() { } } - const int64_t num_indices = static_cast(indices_end_ - indices_begin_); + const int64_t num_indices = static_cast(indices_.size()); DCHECK_EQ(num_indices, std::accumulate(chunk_lengths_.begin(), chunk_lengths_.end(), static_cast(0))); CompressedChunkLocation* physical_begin = - reinterpret_cast(indices_begin_); - DCHECK_EQ(physical_begin + num_indices, - reinterpret_cast(indices_end_)); + reinterpret_cast(indices_.data()); + DCHECK_EQ(physical_begin + num_indices, reinterpret_cast( + indices_.data() + indices_.size())); int64_t chunk_offset = 0; for (int64_t chunk_index = 0; chunk_index < static_cast(chunk_lengths_.size()); @@ -82,12 +82,12 @@ ChunkedIndexMapper::LogicalToPhysical() { for (int64_t i = 0; i < chunk_length; ++i) { // Logical indices are expected to be chunk-partitioned, which avoids costly // chunked index resolution. - DCHECK_GE(indices_begin_[chunk_offset + i], static_cast(chunk_offset)); - DCHECK_LT(indices_begin_[chunk_offset + i], + DCHECK_GE(indices_[chunk_offset + i], static_cast(chunk_offset)); + DCHECK_LT(indices_[chunk_offset + i], static_cast(chunk_offset + chunk_length)); physical_begin[chunk_offset + i] = CompressedChunkLocation{ static_cast(chunk_index), - indices_begin_[chunk_offset + i] - static_cast(chunk_offset)}; + indices_[chunk_offset + i] - static_cast(chunk_offset)}; } chunk_offset += chunk_length; } @@ -105,15 +105,15 @@ Status ChunkedIndexMapper::PhysicalToLogical() { } } - const int64_t num_indices = static_cast(indices_end_ - indices_begin_); + const int64_t num_indices = static_cast(indices_.size()); CompressedChunkLocation* physical_begin = - reinterpret_cast(indices_begin_); + reinterpret_cast(indices_.data()); for (int64_t i = 0; i < num_indices; ++i) { const auto loc = physical_begin[i]; DCHECK_LT(loc.chunk_index(), chunk_offsets.size()); DCHECK_LT(loc.index_in_chunk(), static_cast(chunk_lengths_[loc.chunk_index()])); - indices_begin_[i] = + indices_[i] = chunk_offsets[loc.chunk_index()] + static_cast(loc.index_in_chunk()); } diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.h b/cpp/src/arrow/compute/kernels/chunked_internal.h index 2dcfa0047ea..ed02070330f 100644 --- a/cpp/src/arrow/compute/kernels/chunked_internal.h +++ b/cpp/src/arrow/compute/kernels/chunked_internal.h @@ -127,19 +127,12 @@ ARROW_EXPORT std::vector GetArrayPointers(const ArrayVector& array // and vice-versa. class ARROW_EXPORT ChunkedIndexMapper { public: - ChunkedIndexMapper(const std::vector& chunks, uint64_t* indices_begin, - uint64_t* indices_end) - : ChunkedIndexMapper(std::span(chunks), indices_begin, indices_end) {} - ChunkedIndexMapper(std::span chunks, uint64_t* indices_begin, - uint64_t* indices_end) - : chunk_lengths_(GetChunkLengths(chunks)), - indices_begin_(indices_begin), - indices_end_(indices_end) {} - ChunkedIndexMapper(const RecordBatchVector& chunks, uint64_t* indices_begin, - uint64_t* indices_end) - : chunk_lengths_(GetChunkLengths(chunks)), - indices_begin_(indices_begin), - indices_end_(indices_end) {} + ChunkedIndexMapper(const std::vector& chunks, std::span indices) + : ChunkedIndexMapper(std::span(chunks), indices) {} + ChunkedIndexMapper(std::span chunks, std::span indices) + : chunk_lengths_(GetChunkLengths(chunks)), indices_(indices) {} + ChunkedIndexMapper(const RecordBatchVector& chunks, std::span indices) + : chunk_lengths_(GetChunkLengths(chunks)), indices_(indices) {} // Turn the original uint64_t logical indices into physical. This reuses the // same memory area, so the logical indices cannot be used anymore until @@ -158,8 +151,7 @@ class ARROW_EXPORT ChunkedIndexMapper { static std::vector GetChunkLengths(const RecordBatchVector& chunks); std::vector chunk_lengths_; - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; }; } // namespace arrow::compute::internal diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc index 6e7068f6ecf..c462a0eecc4 100644 --- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc @@ -71,22 +71,24 @@ struct PartitionNthToIndices { return Status::IndexError("NthToIndices index out of bound"); } ArrayData* out_arr = out->array_data().get(); - uint64_t* out_begin = out_arr->GetMutableValues(1); - uint64_t* out_end = out_begin + arr.length(); - std::iota(out_begin, out_end, 0); + std::span out_span{out_arr->GetMutableValues(1), + static_cast(arr.length())}; + std::iota(out_span.begin(), out_span.end(), 0); if (pivot == arr.length()) { return Status::OK(); } - const auto p = PartitionNulls( - out_begin, out_end, arr, 0, options.null_placement); - auto nth_begin = out_begin + pivot; - if (nth_begin >= p.non_nulls_begin && nth_begin < p.non_nulls_end) { - std::nth_element(p.non_nulls_begin, nth_begin, p.non_nulls_end, - [&arr](uint64_t left, uint64_t right) { - const auto lval = GetView::LogicalValue(arr.GetView(left)); - const auto rval = GetView::LogicalValue(arr.GetView(right)); - return lval < rval; - }); + const auto p = PartitionNullsAndNans( + out_span, arr, 0, options.null_placement); + auto nth_begin = out_span.begin() + pivot; + if (nth_begin >= p.non_null_like_range.begin() && + nth_begin < p.non_null_like_range.end()) { + std::ranges::nth_element( + p.non_null_like_range.begin(), nth_begin, p.non_null_like_range.end(), + [&arr](uint64_t left, uint64_t right) { + const auto lval = GetView::LogicalValue(arr.GetView(left)); + const auto rval = GetView::LogicalValue(arr.GetView(right)); + return lval < rval; + }); } return Status::OK(); } @@ -145,16 +147,17 @@ class ArrayCompareSorter { using GetView = GetViewType; public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, ExecContext*) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext*) { const auto& values = checked_cast(array); - const auto p = PartitionNulls( - indices_begin, indices_end, values, offset, options.null_placement); + const auto p = PartitionNullsAndNans( + indices, values, offset, options.null_placement); if (options.order == SortOrder::Ascending) { std::stable_sort( - p.non_nulls_begin, p.non_nulls_end, + p.non_null_like_range.begin(), p.non_null_like_range.end(), [&values, &offset](uint64_t left, uint64_t right) { const auto lhs = GetView::LogicalValue(values.GetView(left - offset)); const auto rhs = GetView::LogicalValue(values.GetView(right - offset)); @@ -162,7 +165,7 @@ class ArrayCompareSorter { }); } else { std::stable_sort( - p.non_nulls_begin, p.non_nulls_end, + p.non_null_like_range.begin(), p.non_null_like_range.end(), [&values, &offset](uint64_t left, uint64_t right) { const auto lhs = GetView::LogicalValue(values.GetView(left - offset)); const auto rhs = GetView::LogicalValue(values.GetView(right - offset)); @@ -178,10 +181,10 @@ class ArrayCompareSorter { template <> class ArrayCompareSorter { public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, - ExecContext* ctx) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext* ctx) { const auto& dict_array = checked_cast(array); const auto& dict_values = dict_array.dictionary(); const auto& dict_indices = dict_array.indices(); @@ -220,7 +223,7 @@ class ArrayCompareSorter { DCHECK_EQ(decoded_ranks->length(), dict_array.length()); ARROW_ASSIGN_OR_RAISE(auto rank_sorter, GetArraySorter(*decoded_ranks->type())); - return rank_sorter(indices_begin, indices_end, *decoded_ranks, offset, options, ctx); + return rank_sorter(indices, *decoded_ranks, offset, options, ctx); } private: @@ -264,12 +267,12 @@ class ArrayCompareSorter { template <> class ArrayCompareSorter { public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, - ExecContext* ctx) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext* ctx) { const auto& struct_array = checked_cast(array); - return SortStructArray(ctx, indices_begin, indices_end, struct_array, options.order, + return SortStructArray(ctx, indices, struct_array, options.order, options.null_placement); } }; @@ -290,17 +293,17 @@ class ArrayCountSorter { value_range_ = static_cast(max - min) + 1; } - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, - ExecContext*) const { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext*) const { const auto& values = checked_cast(array); // 32bit counter performs much better than 64bit one if (values.length() < (1LL << 32)) { - return SortInternal(indices_begin, indices_end, values, offset, options); + return SortInternal(indices, values, offset, options); } else { - return SortInternal(indices_begin, indices_end, values, offset, options); + return SortInternal(indices, values, offset, options); } } @@ -309,14 +312,14 @@ class ArrayCountSorter { uint32_t value_range_{0}; template - NullPartitionResult SortInternal(uint64_t* indices_begin, uint64_t* indices_end, - const ArrayType& values, int64_t offset, - const ArraySortOptions& options) const { + PartitionResultByNullLikeness SortInternal(std::span indices, + const ArrayType& values, int64_t offset, + const ArraySortOptions& options) const { const uint32_t value_range = value_range_; // first and last slot reserved for prefix sum (depending on sort order) std::vector counts(2 + value_range); - NullPartitionResult p; + PartitionResultByNullLikeness p; if (options.order == SortOrder::Ascending) { // counts will be increasing, starting with 0 and ending with (length - null_count) @@ -325,13 +328,10 @@ class ArrayCountSorter { counts[i] += counts[i - 1]; } - if (options.null_placement == NullPlacement::AtStart) { - p = NullPartitionResult::NullsAtStart(indices_begin, indices_end, - indices_end - counts[value_range]); - } else { - p = NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - indices_begin + counts[value_range]); - } + p = PartitionResultByNullLikeness::fromCounts(indices, counts[value_range], 0, + indices.size() - counts[value_range], + options.null_placement); + EmitIndices(p, values, offset, &counts[0]); } else { // counts will be decreasing, starting with (length - null_count) and ending with 0 @@ -340,13 +340,8 @@ class ArrayCountSorter { counts[i - 1] += counts[i]; } - if (options.null_placement == NullPlacement::AtStart) { - p = NullPartitionResult::NullsAtStart(indices_begin, indices_end, - indices_end - counts[0]); - } else { - p = NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - indices_begin + counts[0]); - } + p = PartitionResultByNullLikeness::fromCounts( + indices, counts[0], 0, indices.size() - counts[0], options.null_placement); EmitIndices(p, values, offset, &counts[1]); } return p; @@ -359,14 +354,14 @@ class ArrayCountSorter { } template - void EmitIndices(const NullPartitionResult& p, const ArrayType& values, int64_t offset, - CounterType* counts) const { + void EmitIndices(const PartitionResultByNullLikeness& p, const ArrayType& values, + int64_t offset, CounterType* counts) const { int64_t index = offset; CounterType count_nulls = 0; VisitRawValuesInline( *values.data(), - [&](c_type v) { p.non_nulls_begin[counts[v - min_]++] = index++; }, - [&]() { p.nulls_begin[count_nulls++] = index++; }); + [&](c_type v) { p.non_null_like_range[counts[v - min_]++] = index++; }, + [&]() { p.null_range[count_nulls++] = index++; }); } }; @@ -375,25 +370,22 @@ class ArrayCountSorter { public: ArrayCountSorter() = default; - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, ExecContext*) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext*) { const auto& values = checked_cast(array); std::array counts{0, 0, 0}; // false, true, null const int64_t nulls = values.null_count(); + const int64_t non_nulls = values.length() - nulls; + const int64_t ones = values.true_count(); - const int64_t zeros = values.length() - ones - nulls; + const int64_t zeros = non_nulls - ones; - NullPartitionResult p; - if (options.null_placement == NullPlacement::AtStart) { - p = NullPartitionResult::NullsAtStart(indices_begin, indices_end, - indices_begin + nulls); - } else { - p = NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - indices_end - nulls); - } + PartitionResultByNullLikeness p = PartitionResultByNullLikeness::fromCounts( + indices, non_nulls, 0, nulls, options.null_placement); if (options.order == SortOrder::Ascending) { // ones start after zeros @@ -405,8 +397,8 @@ class ArrayCountSorter { int64_t index = offset; VisitRawValuesInline( - *values.data(), [&](bool v) { p.non_nulls_begin[counts[v]++] = index++; }, - [&]() { p.nulls_begin[counts[2]++] = index++; }); + *values.data(), [&](bool v) { p.non_null_like_range[counts[v]++] = index++; }, + [&]() { p.null_range[counts[2]++] = index++; }); return p; } }; @@ -420,10 +412,10 @@ class ArrayCountOrCompareSorter { using c_type = typename ArrowType::c_type; public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& array, int64_t offset, - const ArraySortOptions& options, - ExecContext* ctx) { + Result operator()(std::span indices, + const Array& array, int64_t offset, + const ArraySortOptions& options, + ExecContext* ctx) { const auto& values = checked_cast(array); if (values.length() >= countsort_min_len_ && values.length() > values.null_count()) { @@ -435,11 +427,11 @@ class ArrayCountOrCompareSorter { if (static_cast(max) - static_cast(min) <= countsort_max_range_) { count_sorter_.SetMinMax(min, max); - return count_sorter_(indices_begin, indices_end, values, offset, options, ctx); + return count_sorter_(indices, values, offset, options, ctx); } } - return compare_sorter_(indices_begin, indices_end, values, offset, options, ctx); + return compare_sorter_(indices, values, offset, options, ctx); } private: @@ -461,11 +453,12 @@ class ArrayCountOrCompareSorter { class ArrayNullSorter { public: - Result operator()(uint64_t* indices_begin, uint64_t* indices_end, - const Array& values, int64_t offset, - const ArraySortOptions& options, ExecContext*) { - return NullPartitionResult::NullsOnly(indices_begin, indices_end, - options.null_placement); + Result operator()(std::span indices, + const Array& values, int64_t offset, + const ArraySortOptions& options, + ExecContext*) { + return PartitionResultByNullLikeness::fromCounts(indices, 0, 0, indices.size(), + options.null_placement); } }; @@ -542,14 +535,14 @@ struct ArraySortIndices { static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const auto& options = ArraySortIndicesState::Get(ctx); ArrayData* out_arr = out->array_data().get(); - uint64_t* out_begin = out_arr->GetMutableValues(1); - uint64_t* out_end = out_begin + out_arr->length; - std::iota(out_begin, out_end, 0); + std::span out_span{out_arr->GetMutableValues(1), + static_cast(out_arr->length)}; + std::iota(out_span.begin(), out_span.end(), 0); ArrayType arr(batch[0].array.ToArrayData()); ARROW_ASSIGN_OR_RAISE(auto sorter, GetArraySorter(*GetPhysicalType(arr.type()))); - return sorter(out_begin, out_end, arr, 0, options, ctx->exec_context()).status(); + return sorter(out_span, arr, 0, options, ctx->exec_context()).status(); } }; @@ -557,12 +550,11 @@ Status ArraySortIndicesChunked(KernelContext* ctx, const ExecBatch& batch, Datum const auto& options = ArraySortIndicesState::Get(ctx); ArrayData* out_arr = out->mutable_array(); DCHECK_EQ(out_arr->length, batch.length); - uint64_t* out_begin = out_arr->GetMutableValues(1); - uint64_t* out_end = out_begin + out_arr->length; - std::iota(out_begin, out_end, 0); - return SortChunkedArray(ctx->exec_context(), out_begin, out_end, - *batch[0].chunked_array(), options.order, - options.null_placement) + std::span out_span{out_arr->GetMutableValues(1), + static_cast(out_arr->length)}; + std::iota(out_span.begin(), out_span.end(), 0); + return SortChunkedArray(ctx->exec_context(), out_span, *batch[0].chunked_array(), + options.order, options.null_placement) .status(); } diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc index d32dd359c76..b95314679ee 100644 --- a/cpp/src/arrow/compute/kernels/vector_rank.cc +++ b/cpp/src/arrow/compute/kernels/vector_rank.cc @@ -37,16 +37,16 @@ namespace { // is the same as the value at the previous sort index. constexpr uint64_t kDuplicateMask = 1ULL << 63; -template -void MarkDuplicates(const NullPartitionResult& sorted, ValueSelector&& value_selector, - IsNullSelector&& is_null_selector) { +template +void MarkDuplicates(const PartitionResultByNullLikeness& sorted, + ValueSelector&& value_selector) { using T = decltype(value_selector(int64_t{})); // Process non-nulls - if (sorted.non_nulls_end != sorted.non_nulls_begin) { - auto it = sorted.non_nulls_begin; + if (!sorted.non_null_like_range.empty()) { + auto it = sorted.non_null_like_range.begin(); T prev_value = value_selector(*it); - while (++it < sorted.non_nulls_end) { + while (++it < sorted.non_null_like_range.end()) { T curr_value = value_selector(*it); if (curr_value == prev_value) { *it |= kDuplicateMask; @@ -55,23 +55,24 @@ void MarkDuplicates(const NullPartitionResult& sorted, ValueSelector&& value_sel } } + // Process nans + if (!sorted.nan_range.empty()) { + for (auto& index : sorted.nan_range.subspan(1)) { + index |= kDuplicateMask; + } + } + // Process nulls - if (sorted.nulls_end != sorted.nulls_begin) { - auto it = sorted.nulls_begin; - bool prev_is_null = is_null_selector(*it); - while (++it < sorted.nulls_end) { - bool curr_is_null = is_null_selector(*it); - if (curr_is_null == prev_is_null) { - *it |= kDuplicateMask; - } - prev_is_null = curr_is_null; + if (!sorted.null_range.empty()) { + for (auto& index : sorted.null_range.subspan(1)) { + index |= kDuplicateMask; } } } template -Result DoSortAndMarkDuplicate( - ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, const Array& input, +Result DoSortAndMarkDuplicate( + ExecContext* ctx, std::span indices, const Array& input, const std::shared_ptr& physical_type, const SortOrder order, const NullPlacement null_placement, bool needs_duplicates) { using GetView = GetViewType; @@ -80,51 +81,38 @@ Result DoSortAndMarkDuplicate( ARROW_ASSIGN_OR_RAISE(auto array_sorter, GetArraySorter(*physical_type)); ArrayType array(input.data()); - ARROW_ASSIGN_OR_RAISE(auto sorted, - array_sorter(indices_begin, indices_end, array, 0, - ArraySortOptions(order, null_placement), ctx)); + ARROW_ASSIGN_OR_RAISE( + auto sorted, + array_sorter(indices, array, 0, ArraySortOptions(order, null_placement), ctx)); if (needs_duplicates) { auto value_selector = [&array](int64_t index) { return GetView::LogicalValue(array.GetView(index)); }; - if constexpr (has_null_like_values()) { - auto is_null_selector = [&array](int64_t index) { return array.IsNull(index); }; - MarkDuplicates(sorted, value_selector, is_null_selector); - } else { - MarkDuplicates(sorted, value_selector, [](int64_t) { return true; }); - } + MarkDuplicates(sorted, value_selector); } return sorted; } template -Result DoSortAndMarkDuplicate( - ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, - const ChunkedArray& input, const std::shared_ptr& physical_type, - const SortOrder order, const NullPlacement null_placement, bool needs_duplicates) { +Result DoSortAndMarkDuplicate( + ExecContext* ctx, std::span indices, const ChunkedArray& input, + const std::shared_ptr& physical_type, const SortOrder order, + const NullPlacement null_placement, bool needs_duplicates) { auto physical_chunks = GetPhysicalChunks(input, physical_type); if (physical_chunks.empty()) { - return NullPartitionResult{}; + return PartitionResultByNullLikeness::fromCounts(indices, 0, 0, 0, null_placement); } - ARROW_ASSIGN_OR_RAISE(auto sorted, - SortChunkedArray(ctx, indices_begin, indices_end, physical_type, - physical_chunks, order, null_placement)); + ARROW_ASSIGN_OR_RAISE( + auto sorted, SortChunkedArray(ctx, indices, physical_type, physical_chunks, order, + null_placement)); if (needs_duplicates) { const auto arrays = GetArrayPointers(physical_chunks); auto value_selector = [resolver = ChunkedArrayResolver(std::span(arrays))](int64_t index) { return resolver.Resolve(index).Value(); }; - if constexpr (has_null_like_values()) { - auto is_null_selector = - [resolver = ChunkedArrayResolver(std::span(arrays))](int64_t index) { - return resolver.Resolve(index).IsNull(); - }; - MarkDuplicates(sorted, value_selector, is_null_selector); - } else { - MarkDuplicates(sorted, value_selector, [](int64_t) { return true; }); - } + MarkDuplicates(sorted, value_selector); } return sorted; } @@ -132,31 +120,29 @@ Result DoSortAndMarkDuplicate( template class SortAndMarkDuplicate : public TypeVisitor { public: - SortAndMarkDuplicate(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, + SortAndMarkDuplicate(ExecContext* ctx, std::span indices, const InputType& input, const SortOrder order, const NullPlacement null_placement, const bool needs_duplicate) : TypeVisitor(), ctx_(ctx), - indices_begin_(indices_begin), - indices_end_(indices_end), + indices_(indices), input_(input), order_(order), null_placement_(null_placement), needs_duplicates_(needs_duplicate), physical_type_(GetPhysicalType(input.type())) {} - Result Run() { + Result Run() { RETURN_NOT_OK(physical_type_->Accept(this)); return sorted_; } -#define VISIT(TYPE) \ - Status Visit(const TYPE& type) { \ - ARROW_ASSIGN_OR_RAISE( \ - sorted_, DoSortAndMarkDuplicate(ctx_, indices_begin_, indices_end_, \ - input_, physical_type_, order_, \ - null_placement_, needs_duplicates_)); \ - return Status::OK(); \ +#define VISIT(TYPE) \ + Status Visit(const TYPE& type) { \ + ARROW_ASSIGN_OR_RAISE(sorted_, DoSortAndMarkDuplicate( \ + ctx_, indices_, input_, physical_type_, order_, \ + null_placement_, needs_duplicates_)); \ + return Status::OK(); \ } VISIT_SORTABLE_PHYSICAL_TYPES(VISIT) @@ -165,20 +151,20 @@ class SortAndMarkDuplicate : public TypeVisitor { private: ExecContext* ctx_; - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; const InputType& input_; const SortOrder order_; const NullPlacement null_placement_; const bool needs_duplicates_; const std::shared_ptr physical_type_; - NullPartitionResult sorted_{}; + PartitionResultByNullLikeness sorted_{}; }; // A CRTP-based helper class for "rank_normal" and "rank_quantile" template struct BaseQuantileRanker { - Result CreateRankings(ExecContext* ctx, const NullPartitionResult& sorted) { + Result CreateRankings(ExecContext* ctx, + const PartitionResultByNullLikeness& sorted) { const int64_t length = sorted.overall_end() - sorted.overall_begin(); ARROW_ASSIGN_OR_RAISE(auto rankings, MakeMutableFloat64Array(length, ctx->memory_pool())); @@ -228,7 +214,8 @@ struct NormalRanker : public BaseQuantileRanker { struct OrdinalRanker { explicit OrdinalRanker(RankOptions::Tiebreaker tiebreaker) : tiebreaker_(tiebreaker) {} - Result CreateRankings(ExecContext* ctx, const NullPartitionResult& sorted) { + Result CreateRankings(ExecContext* ctx, + const PartitionResultByNullLikeness& sorted) { const int64_t length = sorted.overall_end() - sorted.overall_begin(); ARROW_ASSIGN_OR_RAISE(auto rankings, MakeMutableUInt64Array(length, ctx->memory_pool())); @@ -379,13 +366,13 @@ class RankMetaFunctionBase : public MetaFunction { int64_t length = input.length(); ARROW_ASSIGN_OR_RAISE(auto indices, MakeMutableUInt64Array(length, ctx->memory_pool())); - auto* indices_begin = indices->GetMutableValues(1); - auto* indices_end = indices_begin + length; - std::iota(indices_begin, indices_end, 0); + std::span indices_span{indices->GetMutableValues(1), + static_cast(length)}; + std::iota(indices_span.begin(), indices_span.end(), 0); auto needs_duplicates = Derived::NeedsDuplicates(options); ARROW_ASSIGN_OR_RAISE( - auto sorted, SortAndMarkDuplicate(ctx, indices_begin, indices_end, input, order, - null_placement, needs_duplicates) + auto sorted, SortAndMarkDuplicate(ctx, indices_span, input, order, null_placement, + needs_duplicates) .Run()); auto ranker = Derived::GetRanker(options); diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc index cc375919658..ed22625fa8e 100644 --- a/cpp/src/arrow/compute/kernels/vector_select_k.cc +++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc @@ -166,29 +166,6 @@ void HeapSortNonNullsToOutput(std::span non_null_input_range, } } -struct PartitionResultByNullLikeness { - std::span non_null_like_range; - std::span null_range; - std::span nan_range; -}; - -template -PartitionResultByNullLikeness PartitionNullsAndNans(uint64_t* indices_begin, - uint64_t* indices_end, - const ArrayType& values, - int64_t offset, - NullPlacement null_placement) { - // Partition nulls at start (resp. end), and null-like values just before (resp. after) - NullPartitionResult p = PartitionNullsOnly(indices_begin, indices_end, - values, offset, null_placement); - NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); - return PartitionResultByNullLikeness{ - .non_null_like_range = {q.non_nulls_begin, q.non_nulls_end}, - .null_range = {p.nulls_begin, p.nulls_end}, - .nan_range = {q.nulls_begin, q.nulls_end}}; -} - class ArraySelector : public TypeVisitor { public: ArraySelector(ExecContext* ctx, const Array& array, const SelectKOptions& options, @@ -228,16 +205,14 @@ class ArraySelector : public TypeVisitor { std::vector indices(arr.length()); - uint64_t* indices_begin = indices.data(); - uint64_t* indices_end = indices_begin + indices.size(); - std::iota(indices_begin, indices_end, 0); + std::iota(indices.begin(), indices.end(), 0); ARROW_ASSIGN_OR_RAISE(auto take_indices, MakeMutableUInt64Array(k_, ctx_->memory_pool())); auto* output_begin = take_indices->template GetMutableValues(1); const auto p = PartitionNullsAndNans( - indices_begin, indices_end, arr, 0, null_placement_); + indices, arr, 0, null_placement_); // From k, calculate // l = non_null_like elements to take from PartitionResult @@ -340,13 +315,11 @@ class ChunkedArraySelector : public TypeVisitor { auto& indices = indices_by_chunk.emplace_back(); indices.resize(arr.length()); - uint64_t* indices_begin = indices.data(); - uint64_t* indices_end = indices_begin + indices.size(); - std::iota(indices_begin, indices_end, 0); + std::iota(indices.begin(), indices.end(), 0); partitions_by_chunk.emplace_back( - PartitionNullsAndNans( - indices_begin, indices_end, arr, 0, null_placement_)); + PartitionNullsAndNans(indices, arr, 0, + null_placement_)); null_count += partitions_by_chunk.back().null_range.size(); nan_count += partitions_by_chunk.back().nan_range.size(); @@ -495,12 +468,8 @@ class RecordBatchSelector { const auto& first_remaining_sort_key = selector_->sort_keys_[start_sort_key_index_]; const auto& arr = checked_cast(first_remaining_sort_key.array); - uint64_t* input_indices_begin = input_indices_.data(); - uint64_t* input_indices_end = input_indices_.data() + input_indices_.size(); - const auto p = PartitionNullsAndNans( - input_indices_begin, input_indices_end, arr, 0, - first_remaining_sort_key.null_placement); + input_indices_, arr, 0, first_remaining_sort_key.null_placement); // From k = output_indices_.size(), calculate // l = non_null_like elements to take from PartitionResult diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index 06eed160233..e7190ff84b4 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -45,13 +45,13 @@ Result BatchesFromTable(const Table& table) { // then merging the sorted chunks recursively. class ChunkedArraySorter : public TypeVisitor { public: - ChunkedArraySorter(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, + ChunkedArraySorter(ExecContext* ctx, std::span indices, const std::shared_ptr& physical_type, const ArrayVector& physical_chunks, const SortOrder order, - const NullPlacement null_placement, NullPartitionResult* output) + const NullPlacement null_placement, + PartitionResultByNullLikeness* output) : TypeVisitor(), - indices_begin_(indices_begin), - indices_end_(indices_end), + indices_(indices), physical_type_(physical_type), physical_chunks_(physical_chunks), order_(order), @@ -72,7 +72,7 @@ class ChunkedArraySorter : public TypeVisitor { #undef VISIT Status Visit(const NullType&) override { - std::iota(indices_begin_, indices_end_, 0); + std::iota(indices_.begin(), indices_.end(), 0); return Status::OK(); } @@ -83,61 +83,53 @@ class ChunkedArraySorter : public TypeVisitor { ArraySortOptions options(order_, null_placement_); const auto num_chunks = static_cast(physical_chunks_.size()); if (num_chunks == 0) { - *output_ = {indices_end_, indices_end_, indices_end_, indices_end_}; + DCHECK_EQ(static_cast(indices_.size()), 0); + *output_ = + PartitionResultByNullLikeness::fromCounts(indices_, 0, 0, 0, null_placement_); return Status::OK(); } - const int64_t num_indices = static_cast(indices_end_ - indices_begin_); + const int64_t num_indices = static_cast(indices_.size()); const auto arrays = GetArrayPointers(physical_chunks_); // Sort each chunk independently and merge to sorted indices. // This is a serial implementation. - std::vector sorted(num_chunks); + std::vector sorted(num_chunks); // First sort all individual chunks int64_t begin_offset = 0; - int64_t end_offset = 0; int64_t null_count = 0; for (int i = 0; i < num_chunks; ++i) { const auto array = checked_cast(arrays[i]); - end_offset += array->length(); + const auto array_length = array->length(); null_count += array->null_count(); - ARROW_ASSIGN_OR_RAISE(sorted[i], array_sorter_(indices_begin_ + begin_offset, - indices_begin_ + end_offset, *array, - begin_offset, options, ctx_)); - begin_offset = end_offset; + ARROW_ASSIGN_OR_RAISE( + sorted[i], array_sorter_(indices_.subspan(begin_offset, array_length), *array, + begin_offset, options, ctx_)); + begin_offset += array_length; } - DCHECK_EQ(end_offset, num_indices); + DCHECK_EQ(begin_offset, num_indices); // Then merge them by pairs, recursively if (sorted.size() > 1) { - ChunkedIndexMapper chunked_mapper(arrays, indices_begin_, indices_end_); + ChunkedIndexMapper chunked_mapper(arrays, indices_); ARROW_ASSIGN_OR_RAISE(auto chunked_indices_pair, chunked_mapper.LogicalToPhysical()); auto [chunked_indices_begin, chunked_indices_end] = chunked_indices_pair; - std::vector chunk_sorted(num_chunks); + std::vector chunk_sorted(num_chunks); for (int i = 0; i < num_chunks; ++i) { - chunk_sorted[i] = sorted[i].TranslateTo(indices_begin_, chunked_indices_begin); + chunk_sorted[i] = sorted[i].TranslateTo(indices_.data(), chunked_indices_begin); } - auto merge_nulls = [&](CompressedChunkLocation* nulls_begin, - CompressedChunkLocation* nulls_middle, - CompressedChunkLocation* nulls_end, - CompressedChunkLocation* temp_indices, int64_t null_count) { - if (has_null_like_values()) { - PartitionNullsOnly(nulls_begin, nulls_end, arrays, - null_count, null_placement_); - } - }; auto merge_non_nulls = [&](CompressedChunkLocation* range_begin, CompressedChunkLocation* range_middle, CompressedChunkLocation* range_end, CompressedChunkLocation* temp_indices) { - MergeNonNulls(range_begin, range_middle, range_end, arrays, - temp_indices); + MergeNonNulls( + {range_begin, range_middle}, {range_middle, range_end}, arrays, + {temp_indices, static_cast(range_end - range_begin)}); }; - ChunkedMergeImpl merge_impl{null_placement_, std::move(merge_nulls), - std::move(merge_non_nulls)}; + ChunkedMergeImpl merge_impl{null_placement_, std::move(merge_non_nulls)}; // std::merge is only called on non-null values, so size temp indices accordingly RETURN_NOT_OK(merge_impl.Init(ctx_, num_indices - null_count)); @@ -160,47 +152,49 @@ class ChunkedArraySorter : public TypeVisitor { // Reverse everything sorted.resize(1); - sorted[0] = chunk_sorted[0].TranslateTo(chunked_indices_begin, indices_begin_); + sorted[0] = chunk_sorted[0].TranslateTo(chunked_indices_begin, indices_.data()); RETURN_NOT_OK(chunked_mapper.PhysicalToLogical()); } DCHECK_EQ(sorted.size(), 1); - DCHECK_EQ(sorted[0].overall_begin(), indices_begin_); - DCHECK_EQ(sorted[0].overall_end(), indices_end_); + DCHECK_EQ(sorted[0].overall_begin(), indices_.data()); + DCHECK_EQ(sorted[0].overall_end(), indices_.data() + indices_.size()); // Note that "nulls" can also include NaNs, hence the >= check - DCHECK_GE(sorted[0].null_count(), null_count); + DCHECK_GE(static_cast(sorted[0].null_range.size()), null_count); *output_ = sorted[0]; return Status::OK(); } template - void MergeNonNulls(CompressedChunkLocation* range_begin, - CompressedChunkLocation* range_middle, - CompressedChunkLocation* range_end, + void MergeNonNulls(std::span left, + std::span right, std::span arrays, - CompressedChunkLocation* temp_indices) { + std::span temp_indices) { using ArrowType = typename ArrayType::TypeClass; if (order_ == SortOrder::Ascending) { - std::merge(range_begin, range_middle, range_middle, range_end, temp_indices, - [&](CompressedChunkLocation left, CompressedChunkLocation right) { - return ChunkValue(arrays, left) < - ChunkValue(arrays, right); - }); + std::ranges::merge( + left, right, temp_indices.begin(), + [&](CompressedChunkLocation left, CompressedChunkLocation right) { + return ChunkValue(arrays, left) < + ChunkValue(arrays, right); + }); } else { - std::merge(range_begin, range_middle, range_middle, range_end, temp_indices, - [&](CompressedChunkLocation left, CompressedChunkLocation right) { - // We don't use 'left > right' here to reduce required - // operator. If we use 'right < left' here, '<' is only - // required. - return ChunkValue(arrays, right) < - ChunkValue(arrays, left); - }); + std::ranges::merge( + left, right, temp_indices.begin(), + [&](CompressedChunkLocation left, CompressedChunkLocation right) { + // We don't use 'left > right' here to reduce required + // operator. If we use 'right < left' here, '<' is only + // required. + return ChunkValue(arrays, right) < + ChunkValue(arrays, left); + }); } // Copy back temp area into main buffer - std::copy(temp_indices, temp_indices + (range_end - range_begin), range_begin); + std::ranges::copy(temp_indices.begin(), + temp_indices.begin() + left.size() + right.size(), left.begin()); } template @@ -211,15 +205,14 @@ class ChunkedArraySorter : public TypeVisitor { .template Value(); } - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; const std::shared_ptr& physical_type_; const ArrayVector& physical_chunks_; const SortOrder order_; const NullPlacement null_placement_; ArraySortFunc array_sorter_; ExecContext* ctx_; - NullPartitionResult* output_; + PartitionResultByNullLikeness* output_; }; // ---------------------------------------------------------------------- @@ -228,26 +221,26 @@ class ChunkedArraySorter : public TypeVisitor { // Visit contiguous ranges of equal values. All entries are assumed // to be non-null. template -void VisitConstantRanges(const ArrayType& array, uint64_t* indices_begin, - uint64_t* indices_end, int64_t offset, Visitor&& visit) { +void VisitConstantRanges(const ArrayType& array, std::span indices, + int64_t offset, Visitor&& visit) { using GetView = GetViewType; - if (indices_begin == indices_end) { + if (indices.empty()) { return; } - auto range_start = indices_begin; + auto range_start = indices.begin(); auto range_cur = range_start; auto last_value = GetView::LogicalValue(array.GetView(*range_cur - offset)); - while (++range_cur != indices_end) { + while (++range_cur != indices.end()) { auto v = GetView::LogicalValue(array.GetView(*range_cur - offset)); if (v != last_value) { - visit(range_start, range_cur); + visit(std::span{range_start, range_cur}); range_start = range_cur; last_value = v; } } - if (range_start != range_cur) { - visit(range_start, range_cur); + if (range_start != indices.end()) { + visit({range_start, indices.end()}); } } @@ -259,8 +252,8 @@ class RecordBatchColumnSorter { : next_column_(next_column) {} virtual ~RecordBatchColumnSorter() {} - virtual NullPartitionResult SortRange(uint64_t* indices_begin, uint64_t* indices_end, - int64_t offset) = 0; + virtual PartitionResultByNullLikeness SortRange(std::span indices, + int64_t offset) = 0; protected: RecordBatchColumnSorter* next_column_; @@ -281,36 +274,36 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter { null_placement_(null_placement), null_count_(array_.null_count()) {} - NullPartitionResult SortRange(uint64_t* indices_begin, uint64_t* indices_end, - int64_t offset) override { + PartitionResultByNullLikeness SortRange(std::span indices, + int64_t offset) override { using GetView = GetViewType; - NullPartitionResult p; + PartitionResultByNullLikeness partitions; if (null_count_ == 0) { - p = NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement_); + partitions = PartitionNansOnly( + indices, array_, offset, null_placement_); + } else { // NOTE that null_count_ is merely an upper bound on the number of nulls // in this particular range. - p = PartitionNullsOnly(indices_begin, indices_end, array_, - offset, null_placement_); - DCHECK_LE(p.nulls_end - p.nulls_begin, null_count_); + partitions = PartitionNullsAndNans( + indices, array_, offset, null_placement_); + DCHECK_LE(static_cast(partitions.null_range.size()), null_count_); } - const NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, array_, offset, null_placement_); // TODO This is roughly the same as ArrayCompareSorter. // Also, we would like to use a counting sort if possible. This requires // a counting sort compatible with indirect indexing. if (order_ == SortOrder::Ascending) { - std::stable_sort( - q.non_nulls_begin, q.non_nulls_end, [&](uint64_t left, uint64_t right) { + std::ranges::stable_sort( + partitions.non_null_like_range, [&](uint64_t left, uint64_t right) { const auto lhs = GetView::LogicalValue(array_.GetView(left - offset)); const auto rhs = GetView::LogicalValue(array_.GetView(right - offset)); return lhs < rhs; }); } else { - std::stable_sort( - q.non_nulls_begin, q.non_nulls_end, [&](uint64_t left, uint64_t right) { + std::ranges::stable_sort( + partitions.non_null_like_range, [&](uint64_t left, uint64_t right) { // We don't use 'left > right' here to reduce required operator. // If we use 'right < left' here, '<' is only required. const auto lhs = GetView::LogicalValue(array_.GetView(left - offset)); @@ -322,22 +315,19 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter { if (next_column_ != nullptr) { // Visit all ranges of equal values in this column and sort them on // the next column. - SortNextColumn(q.nulls_begin, q.nulls_end, offset); - SortNextColumn(p.nulls_begin, p.nulls_end, offset); - VisitConstantRanges(array_, q.non_nulls_begin, q.non_nulls_end, offset, - [&](uint64_t* range_start, uint64_t* range_end) { - SortNextColumn(range_start, range_end, offset); - }); + SortNextColumn(partitions.null_range, offset); + SortNextColumn(partitions.nan_range, offset); + VisitConstantRanges( + array_, partitions.non_null_like_range, offset, + [&](std::span indices) { SortNextColumn(indices, offset); }); } - return NullPartitionResult{q.non_nulls_begin, q.non_nulls_end, - std::min(q.nulls_begin, p.nulls_begin), - std::max(q.nulls_end, p.nulls_end)}; + return partitions; } - void SortNextColumn(uint64_t* indices_begin, uint64_t* indices_end, int64_t offset) { + void SortNextColumn(std::span indices, int64_t offset) { // Avoid the cost of a virtual method call in trivial cases - if (indices_end - indices_begin > 1) { - next_column_->SortRange(indices_begin, indices_end, offset); + if (indices.size() > 1) { + next_column_->SortRange(indices, offset); } } @@ -357,12 +347,12 @@ class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter RecordBatchColumnSorter* next_column = nullptr) : RecordBatchColumnSorter(next_column), null_placement_(null_placement) {} - NullPartitionResult SortRange(uint64_t* indices_begin, uint64_t* indices_end, - int64_t offset) { + PartitionResultByNullLikeness SortRange(std::span indices, int64_t offset) { if (next_column_ != nullptr) { - next_column_->SortRange(indices_begin, indices_end, offset); + next_column_->SortRange(indices, offset); } - return NullPartitionResult::NullsOnly(indices_begin, indices_end, null_placement_); + return PartitionResultByNullLikeness::fromCounts(indices, 0, 0, indices.size(), + null_placement_); } protected: @@ -395,20 +385,17 @@ class RadixRecordBatchSorter { public: using ResolvedSortKey = ResolvedRecordBatchSortKey; - RadixRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, + RadixRecordBatchSorter(std::span indices, std::vector sort_keys) - : sort_keys_(std::move(sort_keys)), - indices_begin_(indices_begin), - indices_end_(indices_end) {} + : sort_keys_(std::move(sort_keys)), indices_(indices) {} - RadixRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, - const RecordBatch& batch, const SortOptions& options) + RadixRecordBatchSorter(std::span indices, const RecordBatch& batch, + const SortOptions& options) : sort_keys_(ResolveRecordBatchSortKeys(batch, options.GetSortKeys(), &status_)), - indices_begin_(indices_begin), - indices_end_(indices_end) {} + indices_(indices) {} // Offset is for table sorting - Result Sort(int64_t offset = 0) { + Result Sort(int64_t offset = 0) { ARROW_RETURN_NOT_OK(status_); // Create column sorters from right to left @@ -421,7 +408,7 @@ class RadixRecordBatchSorter { } // Sort from left to right - return column_sorts.front()->SortRange(indices_begin_, indices_end_, offset); + return column_sorts.front()->SortRange(indices_, offset); } protected: @@ -474,8 +461,7 @@ class RadixRecordBatchSorter { } const std::vector sort_keys_; - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; Status status_; }; @@ -484,17 +470,13 @@ class MultipleKeyRecordBatchSorter : public TypeVisitor { public: using ResolvedSortKey = ResolvedRecordBatchSortKey; - MultipleKeyRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, + MultipleKeyRecordBatchSorter(std::span indices, std::vector sort_keys) - : indices_begin_(indices_begin), - indices_end_(indices_end), - sort_keys_(std::move(sort_keys)), - comparator_(sort_keys_) {} + : indices_(indices), sort_keys_(std::move(sort_keys)), comparator_(sort_keys_) {} - MultipleKeyRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, - const RecordBatch& batch, const SortOptions& options) - : indices_begin_(indices_begin), - indices_end_(indices_end), + MultipleKeyRecordBatchSorter(std::span indices, const RecordBatch& batch, + const SortOptions& options) + : indices_(indices), sort_keys_(ResolveSortKeys(batch, options.GetSortKeys(), &status_)), comparator_(sort_keys_) {} @@ -540,31 +522,30 @@ class MultipleKeyRecordBatchSorter : public TypeVisitor { const auto p = PartitionNullsInternal(first_sort_key); // Sort first-key non-nulls - std::stable_sort( - p.non_nulls_begin, p.non_nulls_end, [&](uint64_t left, uint64_t right) { - // Both values are never null nor NaN - // (otherwise they've been partitioned away above). - const auto value_left = GetView::LogicalValue(array.GetView(left)); - const auto value_right = GetView::LogicalValue(array.GetView(right)); - if (value_left != value_right) { - bool compared = value_left < value_right; - if (first_sort_key.order == SortOrder::Ascending) { - return compared; - } else { - return !compared; - } - } - // If the left value equals to the right value, - // we need to compare the second and following - // sort keys. - return comparator.Compare(left, right, 1); - }); + std::ranges::stable_sort(p.non_null_like_range, [&](uint64_t left, uint64_t right) { + // Both values are never null nor NaN + // (otherwise they've been partitioned away above). + const auto value_left = GetView::LogicalValue(array.GetView(left)); + const auto value_right = GetView::LogicalValue(array.GetView(right)); + if (value_left != value_right) { + bool compared = value_left < value_right; + if (first_sort_key.order == SortOrder::Ascending) { + return compared; + } else { + return !compared; + } + } + // If the left value equals to the right value, + // we need to compare the second and following + // sort keys. + return comparator.Compare(left, right, 1); + }); return comparator_.status(); } template enable_if_null SortInternal() { - std::stable_sort(indices_begin_, indices_end_, [&](uint64_t left, uint64_t right) { + std::ranges::stable_sort(indices_, [&](uint64_t left, uint64_t right) { return comparator_.Compare(left, right, 1); }); return comparator_.status(); @@ -572,40 +553,37 @@ class MultipleKeyRecordBatchSorter : public TypeVisitor { // Behaves like PartitionNulls() but this supports multiple sort keys. template - NullPartitionResult PartitionNullsInternal(const ResolvedSortKey& first_sort_key) { + PartitionResultByNullLikeness PartitionNullsInternal( + const ResolvedSortKey& first_sort_key) { using ArrayType = typename TypeTraits::ArrayType; const ArrayType& array = ::arrow::internal::checked_cast(first_sort_key.array); - const auto p = PartitionNullsOnly( - indices_begin_, indices_end_, array, 0, first_sort_key.null_placement); - const auto q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, array, 0, first_sort_key.null_placement); + const auto p = PartitionNullsAndNans( + indices_, array, 0, first_sort_key.null_placement); auto& comparator = comparator_; - if (q.nulls_begin != q.nulls_end) { + if (!p.nan_range.empty()) { // Sort all NaNs by the second and following sort keys. // TODO: could we instead run an independent sort from the second key on // this slice? - std::stable_sort(q.nulls_begin, q.nulls_end, - [&comparator](uint64_t left, uint64_t right) { - return comparator.Compare(left, right, 1); - }); + std::ranges::stable_sort(p.nan_range, [&comparator](uint64_t left, uint64_t right) { + return comparator.Compare(left, right, 1); + }); } - if (p.nulls_begin != p.nulls_end) { + if (!p.null_range.empty()) { // Sort all nulls by the second and following sort keys. // TODO: could we instead run an independent sort from the second key on // this slice? - std::stable_sort(p.nulls_begin, p.nulls_end, - [&comparator](uint64_t left, uint64_t right) { - return comparator.Compare(left, right, 1); - }); + std::ranges::stable_sort(p.null_range, + [&comparator](uint64_t left, uint64_t right) { + return comparator.Compare(left, right, 1); + }); } - return q; + return p; } - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; Status status_; std::vector sort_keys_; Comparator comparator_; @@ -625,15 +603,14 @@ class TableSorter { using Comparator = MultipleKeyComparator; public: - TableSorter(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, - const Table& table, const SortOptions& options) + TableSorter(ExecContext* ctx, std::span indices, const Table& table, + const SortOptions& options) : ctx_(ctx), table_(table), batches_(MakeBatches(table, &status_)), options_(options), sort_keys_(ResolveSortKeys(table, batches_, options.GetSortKeys(), &status_)), - indices_begin_(indices_begin), - indices_end_(indices_end), + indices_(indices), comparator_(sort_keys_) {} // This is optimized for null partitioning and merging along the first sort key. @@ -670,7 +647,7 @@ class TableSorter { if (num_batches == 0) { return Status::OK(); } - std::vector sorted(num_batches); + std::vector sorted(num_batches); // First sort all individual batches int64_t begin_offset = 0; @@ -679,33 +656,36 @@ class TableSorter { for (int64_t i = 0; i < num_batches; ++i) { const auto& batch = *batches_[i]; end_offset += batch.num_rows(); - RadixRecordBatchSorter sorter(indices_begin_ + begin_offset, - indices_begin_ + end_offset, batch, options_); + RadixRecordBatchSorter sorter( + {indices_.data() + begin_offset, indices_.data() + end_offset}, batch, + options_); ARROW_ASSIGN_OR_RAISE(sorted[i], sorter.Sort(begin_offset)); - DCHECK_EQ(sorted[i].overall_begin(), indices_begin_ + begin_offset); - DCHECK_EQ(sorted[i].overall_end(), indices_begin_ + end_offset); - DCHECK_EQ(sorted[i].non_null_count() + sorted[i].null_count(), batch.num_rows()); + DCHECK_EQ(sorted[i].overall_begin(), indices_.data() + begin_offset); + DCHECK_EQ(sorted[i].overall_end(), indices_.data() + end_offset); + DCHECK_EQ(static_cast(sorted[i].non_null_like_range.size() + + sorted[i].null_range.size()), + batch.num_rows()); begin_offset = end_offset; // XXX this is an upper bound on the true null count - null_count += sorted[i].null_count(); + null_count += sorted[i].null_range.size(); } - DCHECK_EQ(end_offset, indices_end_ - indices_begin_); + DCHECK_EQ(end_offset, static_cast(indices_.size())); // Then merge them by pairs, recursively if (sorted.size() > 1) { - ChunkedIndexMapper chunked_mapper(batches_, indices_begin_, indices_end_); + ChunkedIndexMapper chunked_mapper(batches_, indices_); ARROW_ASSIGN_OR_RAISE(auto chunked_indices_pair, chunked_mapper.LogicalToPhysical()); auto [chunked_indices_begin, chunked_indices_end] = chunked_indices_pair; - std::vector chunk_sorted(num_batches); + std::vector chunk_sorted(num_batches); for (int64_t i = 0; i < num_batches; ++i) { - chunk_sorted[i] = sorted[i].TranslateTo(indices_begin_, chunked_indices_begin); + chunk_sorted[i] = sorted[i].TranslateTo(indices_.data(), chunked_indices_begin); } struct Visitor { TableSorter* sorter; - std::vector* chunk_sorted; + std::vector* chunk_sorted; int64_t null_count; #define VISIT(TYPE) \ @@ -736,23 +716,15 @@ class TableSorter { // Recursive merge routine, typed on the first sort key template - Status MergeInternal(std::vector* sorted, + Status MergeInternal(std::vector* sorted, int64_t null_count) { - auto merge_nulls = [&](CompressedChunkLocation* nulls_begin, - CompressedChunkLocation* nulls_middle, - CompressedChunkLocation* nulls_end, - CompressedChunkLocation* temp_indices, int64_t null_count) { - MergeNulls(nulls_begin, nulls_middle, nulls_end, temp_indices, - null_count); - }; auto merge_non_nulls = [&](CompressedChunkLocation* range_begin, CompressedChunkLocation* range_middle, CompressedChunkLocation* range_end, CompressedChunkLocation* temp_indices) { MergeNonNulls(range_begin, range_middle, range_end, temp_indices); }; - ChunkedMergeImpl merge_impl(sort_keys_[0].null_placement, std::move(merge_nulls), - std::move(merge_non_nulls)); + ChunkedMergeImpl merge_impl(sort_keys_[0].null_placement, std::move(merge_non_nulls)); RETURN_NOT_OK(merge_impl.Init(ctx_, table_.num_rows())); while (sorted->size() > 1) { @@ -877,8 +849,7 @@ class TableSorter { const RecordBatchVector batches_; const SortOptions& options_; const std::vector sort_keys_; - uint64_t* indices_begin_; - uint64_t* indices_end_; + std::span indices_; Comparator comparator_; }; @@ -999,12 +970,11 @@ class SortIndicesMetaFunction : public MetaFunction { ARROW_ASSIGN_OR_RAISE(buffers[1], AllocateResizableBuffer(buffer_size, ctx->memory_pool())); auto out = std::make_shared(out_type, length, buffers, 0); - auto out_begin = out->GetMutableValues(1); - auto out_end = out_begin + length; - std::iota(out_begin, out_end, 0); + std::span out_span{out->GetMutableValues(1), + static_cast(length)}; + std::iota(out_span.begin(), out_span.end(), 0); - RETURN_NOT_OK( - SortChunkedArray(ctx, out_begin, out_end, chunked_array, order, null_placement)); + RETURN_NOT_OK(SortChunkedArray(ctx, out_span, chunked_array, order, null_placement)); return Datum(out); } @@ -1029,15 +999,15 @@ class SortIndicesMetaFunction : public MetaFunction { ARROW_ASSIGN_OR_RAISE(buffers[1], AllocateResizableBuffer(buffer_size, ctx->memory_pool())); auto out = std::make_shared(out_type, length, buffers, 0); - auto out_begin = out->GetMutableValues(1); - auto out_end = out_begin + length; - std::iota(out_begin, out_end, 0); + std::span out_span{out->GetMutableValues(1), + static_cast(length)}; + std::iota(out_span.begin(), out_span.end(), 0); if (n_sort_keys <= kMaxRadixSortKeys) { - RadixRecordBatchSorter sorter(out_begin, out_end, std::move(sort_keys)); + RadixRecordBatchSorter sorter(out_span, std::move(sort_keys)); ARROW_RETURN_NOT_OK(sorter.Sort()); } else { - MultipleKeyRecordBatchSorter sorter(out_begin, out_end, std::move(sort_keys)); + MultipleKeyRecordBatchSorter sorter(out_span, std::move(sort_keys)); ARROW_RETURN_NOT_OK(sorter.Sort()); } return Datum(out); @@ -1069,11 +1039,11 @@ class SortIndicesMetaFunction : public MetaFunction { ARROW_ASSIGN_OR_RAISE(buffers[1], AllocateResizableBuffer(buffer_size, ctx->memory_pool())); auto out = std::make_shared(out_type, length, buffers, 0); - auto out_begin = out->GetMutableValues(1); - auto out_end = out_begin + length; - std::iota(out_begin, out_end, 0); + std::span out_span{out->GetMutableValues(1), + static_cast(length)}; + std::iota(out_span.begin(), out_span.end(), 0); - TableSorter sorter(ctx, out_begin, out_end, table, options); + TableSorter sorter(ctx, out_span, table, options); RETURN_NOT_OK(sorter.Sort()); return Datum(out); @@ -1145,33 +1115,33 @@ Result> FindSortKeys(const Schema& schema, return SortFieldPopulator{}.FindSortKeys(schema, sort_keys); } -Result SortChunkedArray(ExecContext* ctx, uint64_t* indices_begin, - uint64_t* indices_end, - const ChunkedArray& chunked_array, - SortOrder sort_order, - NullPlacement null_placement) { +Result SortChunkedArray(ExecContext* ctx, + std::span indices, + const ChunkedArray& chunked_array, + SortOrder sort_order, + NullPlacement null_placement) { auto physical_type = GetPhysicalType(chunked_array.type()); auto physical_chunks = GetPhysicalChunks(chunked_array, physical_type); - return SortChunkedArray(ctx, indices_begin, indices_end, physical_type, physical_chunks, - sort_order, null_placement); + return SortChunkedArray(ctx, indices, physical_type, physical_chunks, sort_order, + null_placement); } -Result SortChunkedArray( - ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, +Result SortChunkedArray( + ExecContext* ctx, std::span indices, const std::shared_ptr& physical_type, const ArrayVector& physical_chunks, SortOrder sort_order, NullPlacement null_placement) { - NullPartitionResult output; - ChunkedArraySorter sorter(ctx, indices_begin, indices_end, physical_type, - physical_chunks, sort_order, null_placement, &output); + PartitionResultByNullLikeness output; + ChunkedArraySorter sorter(ctx, indices, physical_type, physical_chunks, sort_order, + null_placement, &output); RETURN_NOT_OK(sorter.Sort()); return output; } -Result SortStructArray(ExecContext* ctx, uint64_t* indices_begin, - uint64_t* indices_end, - const StructArray& array, - SortOrder sort_order, - NullPlacement null_placement) { +Result SortStructArray(ExecContext* ctx, + std::span indices, + const StructArray& array, + SortOrder sort_order, + NullPlacement null_placement) { ARROW_ASSIGN_OR_RAISE(auto columns, array.Flatten()); auto batch = RecordBatch::Make(schema(array.type()->fields()), array.length(), std::move(columns)); @@ -1185,10 +1155,10 @@ Result SortStructArray(ExecContext* ctx, uint64_t* indices_ ARROW_ASSIGN_OR_RAISE(auto sort_keys, ResolveRecordBatchSortKeys(*batch, options.GetSortKeys())); if (sort_keys.size() <= kMaxRadixSortKeys) { - RadixRecordBatchSorter sorter(indices_begin, indices_end, std::move(sort_keys)); + RadixRecordBatchSorter sorter(indices, std::move(sort_keys)); return sorter.Sort(); } else { - MultipleKeyRecordBatchSorter sorter(indices_begin, indices_end, std::move(sort_keys)); + MultipleKeyRecordBatchSorter sorter(indices, std::move(sort_keys)); return sorter.Sort(); } } diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index 06d911b2c0c..8684cc71b2a 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -25,10 +25,13 @@ #include "arrow/array.h" #include "arrow/compute/api_vector.h" +#include "arrow/compute/kernel.h" #include "arrow/compute/kernels/chunked_internal.h" +#include "arrow/compute/ordering.h" #include "arrow/table.h" #include "arrow/type.h" #include "arrow/type_traits.h" +#include "arrow/util/logging_internal.h" namespace arrow::compute::internal { @@ -54,19 +57,16 @@ namespace arrow::compute::internal { // NOTE: std::partition is usually faster than std::stable_partition. struct NonStablePartitioner { - template - IndexType* operator()(IndexType* indices_begin, IndexType* indices_end, - Predicate&& pred) { - return std::partition(indices_begin, indices_end, std::forward(pred)); + template + auto operator()(std::span indices, Predicate&& pred) { + return std::ranges::partition(indices, std::forward(pred)); } }; struct StablePartitioner { - template - IndexType* operator()(IndexType* indices_begin, IndexType* indices_end, - Predicate&& pred) { - return std::stable_partition(indices_begin, indices_end, - std::forward(pred)); + template + auto operator()(std::span indices, Predicate&& pred) { + return std::ranges::stable_partition(indices, std::forward(pred)); } }; @@ -106,268 +106,195 @@ int CompareTypeValues(Value&& left, Value&& right, SortOrder order, } template -struct GenericNullPartitionResult { - IndexType* non_nulls_begin; - IndexType* non_nulls_end; - IndexType* nulls_begin; - IndexType* nulls_end; +struct GenericPartitionResultByNullLikeness { + std::span non_null_like_range; + std::span nan_range; + std::span null_range; - IndexType* overall_begin() const { return std::min(nulls_begin, non_nulls_begin); } - - IndexType* overall_end() const { return std::max(nulls_end, non_nulls_end); } - - int64_t non_null_count() const { return non_nulls_end - non_nulls_begin; } + IndexType* overall_begin() const { + return std::min(non_null_like_range.data(), null_range.data()); + } - int64_t null_count() const { return nulls_end - nulls_begin; } + IndexType* overall_end() const { + return std::max(non_null_like_range.data() + non_null_like_range.size(), + null_range.data() + null_range.size()); + } - static GenericNullPartitionResult NoNulls(IndexType* indices_begin, - IndexType* indices_end, - NullPlacement null_placement) { - if (null_placement == NullPlacement::AtStart) { - return {indices_begin, indices_end, indices_begin, indices_begin}; + template + GenericPartitionResultByNullLikeness TranslateTo( + IndexType* indices_begin, TargetIndexType* target_indices_begin) const { + return {.non_null_like_range = {(non_null_like_range.data() - indices_begin) + + target_indices_begin, + non_null_like_range.size()}, + .nan_range = {(nan_range.data() - indices_begin) + target_indices_begin, + nan_range.size()}, + .null_range = {(null_range.data() - indices_begin) + target_indices_begin, + null_range.size()}}; + } + + static GenericPartitionResultByNullLikeness fromCounts(std::span indices, + int64_t non_null_like_count, + int64_t nan_count, + int64_t null_count, + NullPlacement null_placement) { + GenericPartitionResultByNullLikeness p; + DCHECK_EQ(non_null_like_count + nan_count + null_count, + static_cast(indices.size())); + if (null_placement == NullPlacement::AtEnd) { + p.non_null_like_range = indices.subspan(0, non_null_like_count); + p.nan_range = indices.subspan(non_null_like_count, nan_count); + p.null_range = indices.subspan(non_null_like_count + nan_count, null_count); } else { - return {indices_begin, indices_end, indices_end, indices_end}; + p.null_range = indices.subspan(0, null_count); + p.nan_range = indices.subspan(null_count, nan_count); + p.non_null_like_range = + indices.subspan(null_count + nan_count, non_null_like_count); } + return p; } +}; - static GenericNullPartitionResult NullsOnly(IndexType* indices_begin, - IndexType* indices_end, - NullPlacement null_placement) { +using PartitionResultByNullLikeness = GenericPartitionResultByNullLikeness; +using ChunkedPartitionResultByNullLikeness = + GenericPartitionResultByNullLikeness; + +struct NullPartition { + std::span non_nulls; + std::span nulls; + + static NullPartition NoNulls(std::span indices, + NullPlacement null_placement) { if (null_placement == NullPlacement::AtStart) { - return {indices_end, indices_end, indices_begin, indices_end}; + return {.non_nulls = indices, .nulls = indices.subspan(0, 0)}; } else { - return {indices_begin, indices_begin, indices_begin, indices_end}; + return {.non_nulls = indices, .nulls = indices.subspan(indices.size(), 0)}; } } - static GenericNullPartitionResult NullsAtEnd(IndexType* indices_begin, - IndexType* indices_end, - IndexType* midpoint) { - ARROW_DCHECK_GE(midpoint, indices_begin); - ARROW_DCHECK_LE(midpoint, indices_end); - return {indices_begin, midpoint, midpoint, indices_end}; - } - - static GenericNullPartitionResult NullsAtStart(IndexType* indices_begin, - IndexType* indices_end, - IndexType* midpoint) { - ARROW_DCHECK_GE(midpoint, indices_begin); - ARROW_DCHECK_LE(midpoint, indices_end); - return {midpoint, indices_end, indices_begin, midpoint}; + static NullPartition NullsAtEnd(std::span indices, + std::span null_tail) { + ARROW_DCHECK_GE(null_tail.begin(), indices.begin()); + ARROW_DCHECK_LE(null_tail.begin(), indices.end()); + return {.non_nulls = {indices.begin(), null_tail.begin()}, .nulls = null_tail}; } - template - GenericNullPartitionResult TranslateTo( - IndexType* indices_begin, TargetIndexType* target_indices_begin) const { - return { - (non_nulls_begin - indices_begin) + target_indices_begin, - (non_nulls_end - indices_begin) + target_indices_begin, - (nulls_begin - indices_begin) + target_indices_begin, - (nulls_end - indices_begin) + target_indices_begin, - }; + static NullPartition NullsAtStart(std::span indices, + std::span non_null_tail) { + ARROW_DCHECK_GE(non_null_tail.begin(), indices.begin()); + ARROW_DCHECK_LE(non_null_tail.begin(), indices.end()); + return {.non_nulls = non_null_tail, + .nulls = {indices.begin(), non_null_tail.begin()}}; } }; -using NullPartitionResult = GenericNullPartitionResult; -using ChunkedNullPartitionResult = GenericNullPartitionResult; - // Move nulls (not null-like values) to end of array. // // `offset` is used when this is called on a chunk of a chunked array template -NullPartitionResult PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indices_end, - const Array& values, int64_t offset, - NullPlacement null_placement) { +NullPartition PartitionNullsOnly(std::span indices, const Array& values, + int64_t offset, NullPlacement null_placement) { if (values.null_count() == 0) { - return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement); + return NullPartition::NoNulls(indices, null_placement); } Partitioner partitioner; if (null_placement == NullPlacement::AtStart) { - auto nulls_end = partitioner( - indices_begin, indices_end, - [&values, &offset](uint64_t ind) { return values.IsNull(ind - offset); }); - return NullPartitionResult::NullsAtStart(indices_begin, indices_end, nulls_end); + auto non_null_tail = partitioner(indices, [&values, &offset](uint64_t ind) { + return values.IsNull(ind - offset); + }); + return NullPartition::NullsAtStart(indices, non_null_tail); } else { - auto nulls_begin = partitioner( - indices_begin, indices_end, - [&values, &offset](uint64_t ind) { return !values.IsNull(ind - offset); }); - return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, nulls_begin); + auto null_tail = partitioner(indices, [&values, &offset](uint64_t ind) { + return !values.IsNull(ind - offset); + }); + return NullPartition::NullsAtEnd(indices, null_tail); } } +struct NanPartition { + std::span non_null_like_range; + std::span nan_range; +}; + // Move non-null null-like values to end of array. // // `offset` is used when this is called on a chunk of a chunked array template -NullPartitionResult PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, - const ArrayType& values, int64_t offset, - NullPlacement null_placement) { +NanPartition PartitionNans(std::span indices, const ArrayType& values, + int64_t offset, NullPlacement null_placement) { if constexpr (has_null_like_values()) { Partitioner partitioner; if (null_placement == NullPlacement::AtStart) { - auto null_likes_end = - partitioner(indices_begin, indices_end, [&values, &offset](uint64_t ind) { - return std::isnan(values.GetView(ind - offset)); - }); - return NullPartitionResult::NullsAtStart(indices_begin, indices_end, - null_likes_end); + auto non_null_like_tail = partitioner(indices, [&values, &offset](uint64_t ind) { + return std::isnan(values.GetView(ind - offset)); + }); + return NanPartition{.non_null_like_range = non_null_like_tail, + .nan_range = {indices.data(), non_null_like_tail.data()}}; } else { - auto null_likes_begin = - partitioner(indices_begin, indices_end, [&values, &offset](uint64_t ind) { - return !std::isnan(values.GetView(ind - offset)); - }); - return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - null_likes_begin); + auto nan_tail = partitioner(indices, [&values, &offset](uint64_t ind) { + return !std::isnan(values.GetView(ind - offset)); + }); + return NanPartition{.non_null_like_range = {indices.data(), nan_tail.data()}, + .nan_range = nan_tail}; } } else { - return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement); - } -} - -// Move nulls to end of array. -// -// `offset` is used when this is called on a chunk of a chunked array -template -NullPartitionResult PartitionNulls(uint64_t* indices_begin, uint64_t* indices_end, - const ArrayType& values, int64_t offset, - NullPlacement null_placement) { - // Partition nulls at start (resp. end), and null-like values just before (resp. after) - NullPartitionResult p = PartitionNullsOnly(indices_begin, indices_end, - values, offset, null_placement); - NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, values, offset, null_placement); - return NullPartitionResult{q.non_nulls_begin, q.non_nulls_end, - std::min(q.nulls_begin, p.nulls_begin), - std::max(q.nulls_end, p.nulls_end)}; -} - -// -// Null partitioning on chunked arrays, in two flavors: -// 1) with uint64_t indices and ChunkedArrayResolver -// 2) with CompressedChunkLocation and span of chunks -// - -template -NullPartitionResult PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indices_end, - const ChunkedArrayResolver& resolver, - int64_t null_count, NullPlacement null_placement) { - if (null_count == 0) { - return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement); - } - Partitioner partitioner; - if (null_placement == NullPlacement::AtStart) { - auto nulls_end = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return chunk.IsNull(); - }); - return NullPartitionResult::NullsAtStart(indices_begin, indices_end, nulls_end); - } else { - auto nulls_begin = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return !chunk.IsNull(); - }); - return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, nulls_begin); - } -} - -template -ChunkedNullPartitionResult PartitionNullsOnly(CompressedChunkLocation* locations_begin, - CompressedChunkLocation* locations_end, - std::span chunks, - int64_t null_count, - NullPlacement null_placement) { - if (null_count == 0) { - return ChunkedNullPartitionResult::NoNulls(locations_begin, locations_end, - null_placement); - } - Partitioner partitioner; - if (null_placement == NullPlacement::AtStart) { - auto nulls_end = - partitioner(locations_begin, locations_end, [&](CompressedChunkLocation loc) { - return chunks[loc.chunk_index()]->IsNull( - static_cast(loc.index_in_chunk())); - }); - return ChunkedNullPartitionResult::NullsAtStart(locations_begin, locations_end, - nulls_end); - } else { - auto nulls_begin = - partitioner(locations_begin, locations_end, [&](CompressedChunkLocation loc) { - return !chunks[loc.chunk_index()]->IsNull( - static_cast(loc.index_in_chunk())); - }); - return ChunkedNullPartitionResult::NullsAtEnd(locations_begin, locations_end, - nulls_begin); - } -} - -template -NullPartitionResult PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, - const ChunkedArrayResolver& resolver, - NullPlacement null_placement) { - if constexpr (has_null_like_values()) { - Partitioner partitioner; if (null_placement == NullPlacement::AtStart) { - auto null_likes_end = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return std::isnan(chunk.Value()); - }); - return NullPartitionResult::NullsAtStart(indices_begin, indices_end, - null_likes_end); + return NanPartition{.non_null_like_range = indices, + .nan_range = {indices.data(), indices.data()}}; } else { - auto null_likes_begin = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return !std::isnan(chunk.Value()); - }); - return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, - null_likes_begin); + return NanPartition{.non_null_like_range = indices, + .nan_range = indices.subspan(indices.size())}; } - } else { - return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement); } } template -NullPartitionResult PartitionNulls(uint64_t* indices_begin, uint64_t* indices_end, - const ChunkedArrayResolver& resolver, - int64_t null_count, NullPlacement null_placement) { +PartitionResultByNullLikeness PartitionNullsAndNans(std::span indices, + const ArrayType& values, + int64_t offset, + NullPlacement null_placement) { // Partition nulls at start (resp. end), and null-like values just before (resp. after) - NullPartitionResult p = PartitionNullsOnly( - indices_begin, indices_end, resolver, null_count, null_placement); - NullPartitionResult q = PartitionNullLikes( - p.non_nulls_begin, p.non_nulls_end, resolver, null_placement); - return NullPartitionResult{q.non_nulls_begin, q.non_nulls_end, - std::min(q.nulls_begin, p.nulls_begin), - std::max(q.nulls_end, p.nulls_end)}; + NullPartition p = + PartitionNullsOnly(indices, values, offset, null_placement); + auto q = + PartitionNans(p.non_nulls, values, offset, null_placement); + return PartitionResultByNullLikeness{.non_null_like_range = q.non_null_like_range, + .nan_range = q.nan_range, + .null_range = p.nulls}; } -template -struct GenericMergeImpl { - using MergeNullsFunc = std::function; +template +PartitionResultByNullLikeness PartitionNansOnly(std::span indices, + const ArrayType& values, int64_t offset, + NullPlacement null_placement) { + // Partition nulls at start (resp. end), and null-like values just before (resp. after) + NullPartition p = NullPartition::NoNulls(indices, null_placement); + auto q = + PartitionNans(p.non_nulls, values, offset, null_placement); + return PartitionResultByNullLikeness{.non_null_like_range = q.non_null_like_range, + .nan_range = q.nan_range, + .null_range = p.nulls}; +} - using MergeNonNullsFunc = - std::function; +struct ChunkedMergeImpl { + using MergeNonNullsFunc = std::function; - GenericMergeImpl(NullPlacement null_placement, MergeNullsFunc&& merge_nulls, - MergeNonNullsFunc&& merge_non_nulls) - : null_placement_(null_placement), - merge_nulls_(std::move(merge_nulls)), - merge_non_nulls_(std::move(merge_non_nulls)) {} + ChunkedMergeImpl(NullPlacement null_placement, MergeNonNullsFunc&& merge_non_nulls) + : null_placement_(null_placement), merge_non_nulls_(std::move(merge_non_nulls)) {} Status Init(ExecContext* ctx, int64_t temp_indices_length) { - ARROW_ASSIGN_OR_RAISE( - temp_buffer_, - AllocateBuffer(sizeof(IndexType) * temp_indices_length, ctx->memory_pool())); - temp_indices_ = reinterpret_cast(temp_buffer_->mutable_data()); + ARROW_ASSIGN_OR_RAISE(temp_buffer_, AllocateBuffer(sizeof(CompressedChunkLocation) * + temp_indices_length, + ctx->memory_pool())); + temp_indices_ = + reinterpret_cast(temp_buffer_->mutable_data()); return Status::OK(); } - NullPartitionResultType Merge(const NullPartitionResultType& left, - const NullPartitionResultType& right, - int64_t null_count) const { + ChunkedPartitionResultByNullLikeness Merge( + const ChunkedPartitionResultByNullLikeness& left, + const ChunkedPartitionResultByNullLikeness& right, int64_t null_count) const { if (null_placement_ == NullPlacement::AtStart) { return MergeNullsAtStart(left, right, null_count); } else { @@ -375,71 +302,82 @@ struct GenericMergeImpl { } } - NullPartitionResultType MergeNullsAtStart(const NullPartitionResultType& left, - const NullPartitionResultType& right, - int64_t null_count) const { + ChunkedPartitionResultByNullLikeness MergeNullsAtStart( + const ChunkedPartitionResultByNullLikeness& left, + const ChunkedPartitionResultByNullLikeness& right, int64_t null_count) const { // Input layout: - // [left nulls .... left non-nulls .... right nulls .... right non-nulls] - ARROW_DCHECK_EQ(left.nulls_end, left.non_nulls_begin); - ARROW_DCHECK_EQ(left.non_nulls_end, right.nulls_begin); - ARROW_DCHECK_EQ(right.nulls_end, right.non_nulls_begin); - - // Mutate the input, stably, to obtain the following layout: - // [left nulls .... right nulls .... left non-nulls .... right non-nulls] - std::rotate(left.non_nulls_begin, right.nulls_begin, right.nulls_end); - - const auto p = NullPartitionResultType::NullsAtStart( - left.nulls_begin, right.non_nulls_end, - left.nulls_begin + left.null_count() + right.null_count()); - - // If the type has null-like values (such as NaN), ensure those plus regular - // nulls are partitioned in the right order. Note this assumes that all - // null-like values (e.g. NaN) are ordered equally. - if (p.null_count()) { - merge_nulls_(p.nulls_begin, p.nulls_begin + left.null_count(), p.nulls_end, - temp_indices_, null_count); - } - - // Merge the non-null values into temp area - ARROW_DCHECK_EQ(right.non_nulls_begin - p.non_nulls_begin, left.non_null_count()); - ARROW_DCHECK_EQ(p.non_nulls_end - right.non_nulls_begin, right.non_null_count()); - if (p.non_null_count()) { - merge_non_nulls_(p.non_nulls_begin, right.non_nulls_begin, p.non_nulls_end, + // [left nul .. left nan .. left non-nul .. right nul .. right nan .. right non-nul] + ARROW_DCHECK_EQ(left.null_range.end(), left.nan_range.begin()); + ARROW_DCHECK_EQ(left.nan_range.end(), left.non_null_like_range.begin()); + ARROW_DCHECK_EQ(left.non_null_like_range.end(), right.null_range.begin()); + ARROW_DCHECK_EQ(right.null_range.end(), right.nan_range.begin()); + ARROW_DCHECK_EQ(right.nan_range.end(), right.non_null_like_range.begin()); + + // Mutate the input, stably in two steps, to obtain the following layouts: + // [left nul .. left nan .. right nul .. right nan .. left non-nul .. right non-nus] + std::rotate(left.non_null_like_range.begin(), right.null_range.begin(), + right.nan_range.end()); + + // only use sizes of ranges that are at a different position now + // [left nul .. right nul .. left nan .. right nan .. left non-nulls .. right + // non-nulls] this is a no-op if no nan values are present + std::rotate(left.nan_range.begin(), left.nan_range.begin() + left.nan_range.size(), + left.nan_range.begin() + left.nan_range.size() + right.null_range.size()); + + std::span full_span{left.overall_begin(), + right.overall_end()}; + const auto p = ChunkedPartitionResultByNullLikeness::fromCounts( + full_span, left.non_null_like_range.size() + right.non_null_like_range.size(), + left.nan_range.size() + right.nan_range.size(), + left.null_range.size() + right.null_range.size(), NullPlacement::AtStart); + + if (!p.non_null_like_range.empty()) { + merge_non_nulls_(p.non_null_like_range.data(), + p.non_null_like_range.data() + left.non_null_like_range.size(), + p.non_null_like_range.data() + p.non_null_like_range.size(), temp_indices_); } return p; } - NullPartitionResultType MergeNullsAtEnd(const NullPartitionResultType& left, - const NullPartitionResultType& right, - int64_t null_count) const { + ChunkedPartitionResultByNullLikeness MergeNullsAtEnd( + const ChunkedPartitionResultByNullLikeness& left, + const ChunkedPartitionResultByNullLikeness& right, int64_t null_count) const { // Input layout: - // [left non-nulls .... left nulls .... right non-nulls .... right nulls] - ARROW_DCHECK_EQ(left.non_nulls_end, left.nulls_begin); - ARROW_DCHECK_EQ(left.nulls_end, right.non_nulls_begin); - ARROW_DCHECK_EQ(right.non_nulls_end, right.nulls_begin); - - // Mutate the input, stably, to obtain the following layout: - // [left non-nulls .... right non-nulls .... left nulls .... right nulls] - std::rotate(left.nulls_begin, right.non_nulls_begin, right.non_nulls_end); - - const auto p = NullPartitionResultType::NullsAtEnd( - left.non_nulls_begin, right.nulls_end, - left.non_nulls_begin + left.non_null_count() + right.non_null_count()); - - // If the type has null-like values (such as NaN), ensure those plus regular - // nulls are partitioned in the right order. Note this assumes that all - // null-like values (e.g. NaN) are ordered equally. - if (p.null_count()) { - merge_nulls_(p.nulls_begin, p.nulls_begin + left.null_count(), p.nulls_end, - temp_indices_, null_count); - } + // [left non-nul .. left nan .. left nul .. right non-nul .. right nan .. right nulls] + ARROW_DCHECK_EQ(left.non_null_like_range.end(), left.nan_range.begin()); + ARROW_DCHECK_EQ(left.nan_range.end(), left.null_range.begin()); + ARROW_DCHECK_EQ(left.null_range.end(), right.non_null_like_range.begin()); + ARROW_DCHECK_EQ(right.non_null_like_range.end(), right.nan_range.begin()); + ARROW_DCHECK_EQ(right.nan_range.end(), right.null_range.begin()); + + // Mutate the input, stably in two steps, to obtain the following layouts: + // [left non-nul .. right non-nul .. left nan .. left nul .. right nan .. right nul] + std::rotate(left.nan_range.begin(), right.non_null_like_range.begin(), + right.non_null_like_range.end()); + + // only use sizes of ranges that are at a different position now + // [left non-nul .. right non-nul .. left nan .. left nul .. right nan .. right nul] + // this is a no-op if no nan values are present + auto new_left_null_range_begin = + left.non_null_like_range.begin() + left.non_null_like_range.size() + + right.non_null_like_range.size() + left.nan_range.size(); + std::rotate( + new_left_null_range_begin, new_left_null_range_begin + left.null_range.size(), + new_left_null_range_begin + left.null_range.size() + right.nan_range.size()); + + std::span full_span{left.overall_begin(), + right.overall_end()}; + const auto p = ChunkedPartitionResultByNullLikeness::fromCounts( + full_span, left.non_null_like_range.size() + right.non_null_like_range.size(), + left.nan_range.size() + right.nan_range.size(), + left.null_range.size() + right.null_range.size(), NullPlacement::AtEnd); // Merge the non-null values into temp area - ARROW_DCHECK_EQ(left.non_nulls_end - p.non_nulls_begin, left.non_null_count()); - ARROW_DCHECK_EQ(p.non_nulls_end - left.non_nulls_end, right.non_null_count()); - if (p.non_null_count()) { - merge_non_nulls_(p.non_nulls_begin, left.non_nulls_end, p.non_nulls_end, + if (!p.non_null_like_range.empty()) { + merge_non_nulls_(p.non_null_like_range.data(), + p.non_null_like_range.data() + left.non_null_like_range.size(), + p.non_null_like_range.data() + p.non_null_like_range.size(), temp_indices_); } return p; @@ -447,41 +385,36 @@ struct GenericMergeImpl { private: NullPlacement null_placement_; - MergeNullsFunc merge_nulls_; MergeNonNullsFunc merge_non_nulls_; std::unique_ptr temp_buffer_; - IndexType* temp_indices_ = nullptr; + CompressedChunkLocation* temp_indices_ = nullptr; }; -using MergeImpl = GenericMergeImpl; -using ChunkedMergeImpl = - GenericMergeImpl; - // TODO make this usable if indices are non trivial on input // (see ConcreteRecordBatchColumnSorter) // `offset` is used when this is called on a chunk of a chunked array -using ArraySortFunc = std::function( - uint64_t* indices_begin, uint64_t* indices_end, const Array& values, int64_t offset, +using ArraySortFunc = std::function( + std::span indices, const Array& values, int64_t offset, const ArraySortOptions& options, ExecContext* ctx)>; Result GetArraySorter(const DataType& type); -Result SortChunkedArray(ExecContext* ctx, uint64_t* indices_begin, - uint64_t* indices_end, - const ChunkedArray& chunked_array, - SortOrder sort_order, - NullPlacement null_placement); +Result SortChunkedArray(ExecContext* ctx, + std::span indices, + const ChunkedArray& chunked_array, + SortOrder sort_order, + NullPlacement null_placement); -Result SortChunkedArray( - ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, +Result SortChunkedArray( + ExecContext* ctx, std::span indices, const std::shared_ptr& physical_type, const ArrayVector& physical_chunks, SortOrder sort_order, NullPlacement null_placement); -Result SortStructArray(ExecContext* ctx, uint64_t* indices_begin, - uint64_t* indices_end, - const StructArray& array, - SortOrder sort_order, - NullPlacement null_placement); +Result SortStructArray(ExecContext* ctx, + std::span indices, + const StructArray& array, + SortOrder sort_order, + NullPlacement null_placement); // ---------------------------------------------------------------------- // Helpers for Sort/SelectK/Rank implementations