Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
e5a83c9
Implement benchmarks and tests for SearchSorted functionality
Alex-PLACET Apr 2, 2026
e4b2e36
Refactor vector_search_sorted kernel to use ArrayData and add benchma…
Alex-PLACET Apr 2, 2026
17b666f
Enhance documentation for search_sorted kernel with detailed implemen…
Alex-PLACET Apr 2, 2026
881f5b8
Refactor vector_search_sorted kernel to improve null handling and uti…
Alex-PLACET Apr 3, 2026
56f6918
Refactor search_sorted kernel: improve error messages and add compreh…
Alex-PLACET Apr 7, 2026
b3c4e50
Formatting
Alex-PLACET Apr 8, 2026
20372d1
Refactor vector_search_sorted kernel: enhance readability and add noe…
Alex-PLACET Apr 8, 2026
1c0462c
Refactor search_sorted documentation: adjust indentation for clarity
Alex-PLACET Apr 8, 2026
bbac1e5
Implement chunked array support in search_sorted: enhance validation …
Alex-PLACET Apr 22, 2026
a56bcfd
Refactor CheckSimpleScalarSearchSorted: support multiple needles and …
Alex-PLACET Apr 22, 2026
8ebd6ec
Add tests for chunked run-end encoded values and needles in SearchSorted
Alex-PLACET Apr 22, 2026
660182e
formatting
Alex-PLACET Apr 23, 2026
277c76a
Refactor search_sorted: extract null counting logic into reusable fun…
Alex-PLACET Apr 23, 2026
925025c
Formatting
Alex-PLACET Apr 23, 2026
92ea85d
Fix benchmark
Alex-PLACET Apr 24, 2026
dfaa478
Refactor SearchSorted tests: consolidate assertions into reusable Che…
Alex-PLACET Apr 24, 2026
645fd23
Refactor SearchSorted tests: rename and enhance scalar search functio…
Alex-PLACET Apr 24, 2026
57d34c5
Update SearchSorted tests: modify input values and needles for consis…
Alex-PLACET Apr 24, 2026
aa95c22
Add tests for sliced ChunkedRunEndEncodedValues and handle all-null c…
Alex-PLACET Apr 24, 2026
129a743
Add FindNonNullValuesRange method annotation and new test for Chunked…
Alex-PLACET Apr 24, 2026
94f7577
Remove redundant check for RUN_END_ENCODED type in ReadChunkValue method
Alex-PLACET Apr 24, 2026
866b93c
Add chunked array support for search sorted operations and enhance te…
Alex-PLACET Apr 27, 2026
a5b2226
Formatting
Alex-PLACET Apr 27, 2026
d7433cb
Add support for run-end encoded needles in search sorted operations
Alex-PLACET Apr 27, 2026
5a182e0
Refactor search sorted output handling to use InsertionIndexBuilder f…
Alex-PLACET May 4, 2026
b28285d
Enhance search sorted functionality by adding tests for sliced run-en…
Alex-PLACET May 4, 2026
a9da407
Fix
Alex-PLACET May 7, 2026
c524341
Refactor search sorted to dispatch on physical types
Alex-PLACET Jun 5, 2026
34eddc7
fix
Alex-PLACET Jun 5, 2026
60e7037
Reject interspersed nulls in search_sorted values
Alex-PLACET Jun 10, 2026
2bb3e7d
fix casting
Alex-PLACET Jun 15, 2026
e702161
formatting
Alex-PLACET Jun 15, 2026
161b0c0
Add documentation and address comments
Alex-PLACET Jun 29, 2026
3a40b63
wip
Alex-PLACET Jun 29, 2026
9943c43
fix conversion
Alex-PLACET Jun 30, 2026
8c9ec3f
formatting
Alex-PLACET Jul 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,7 @@ if(ARROW_COMPUTE)
compute/kernels/vector_rank.cc
compute/kernels/vector_replace.cc
compute/kernels/vector_run_end_encode.cc
compute/kernels/vector_search_sorted.cc
compute/kernels/vector_select_k.cc
compute/kernels/vector_sort.cc
compute/kernels/vector_statistics.cc
Expand Down
34 changes: 32 additions & 2 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ using compute::FilterOptions;
using compute::NullPlacement;
using compute::RankOptions;
using compute::RankQuantileOptions;
using compute::SearchSortedOptions;

template <>
struct EnumTraits<FilterOptions::NullSelectionBehavior>
Expand Down Expand Up @@ -82,6 +83,22 @@ struct EnumTraits<DictionaryEncodeOptions::NullEncodingBehavior>
return "<INVALID>";
}
};

template <>
struct EnumTraits<SearchSortedOptions::Side>
: BasicEnumTraits<SearchSortedOptions::Side, SearchSortedOptions::Left,
SearchSortedOptions::Right> {
static std::string name() { return "SearchSortedOptions::Side"; }
static std::string value_name(SearchSortedOptions::Side value) {
switch (value) {
case SearchSortedOptions::Left:
return "Left";
case SearchSortedOptions::Right:
return "Right";
}
return "<INVALID>";
}
};
template <>
struct EnumTraits<RankOptions::Tiebreaker>
: BasicEnumTraits<RankOptions::Tiebreaker, RankOptions::Min, RankOptions::Max,
Expand Down Expand Up @@ -125,6 +142,8 @@ static auto kRunEndEncodeOptionsType = GetFunctionOptionsType<RunEndEncodeOption
static auto kArraySortOptionsType = GetFunctionOptionsType<ArraySortOptions>(
DataMember("order", &ArraySortOptions::order),
DataMember("null_placement", &ArraySortOptions::null_placement));
static auto kSearchSortedOptionsType = GetFunctionOptionsType<SearchSortedOptions>(
DataMember("side", &SearchSortedOptions::side));
static auto kSortOptionsType = GetFunctionOptionsType<SortOptions>(
CoercedDataMember("sort_keys", &SortOptions::sort_keys, &SortOptions::GetSortKeys));
static auto kPartitionNthOptionsType = GetFunctionOptionsType<PartitionNthOptions>(
Expand Down Expand Up @@ -182,12 +201,15 @@ ArraySortOptions::ArraySortOptions(SortOrder order, NullPlacement null_placement
null_placement(null_placement) {}
constexpr char ArraySortOptions::kTypeName[];

SearchSortedOptions::SearchSortedOptions(SearchSortedOptions::Side side)
: FunctionOptions(internal::kSearchSortedOptionsType), side(side) {}
constexpr char SearchSortedOptions::kTypeName[];

ARROW_SUPPRESS_DEPRECATION_WARNING
SortOptions::SortOptions(std::vector<SortKey> sort_keys)
: FunctionOptions(internal::kSortOptionsType),
sort_keys(std::move(sort_keys)),
null_placement(std::nullopt) {}

SortOptions::SortOptions(std::vector<SortKey> sort_keys,
std::optional<NullPlacement> null_placement)
: FunctionOptions(internal::kSortOptionsType),
Expand All @@ -196,7 +218,9 @@ SortOptions::SortOptions(std::vector<SortKey> sort_keys,
SortOptions::SortOptions(const Ordering& ordering)
: FunctionOptions(internal::kSortOptionsType),
sort_keys(ordering.sort_keys()),
null_placement(ordering.null_placement()) {}
null_placement(std::nullopt) {
null_placement = ordering.null_placement();
Comment on lines +221 to +222

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why this change was necessary?

}
constexpr char SortOptions::kTypeName[];
ARROW_UNSUPPRESS_DEPRECATION_WARNING

Expand Down Expand Up @@ -277,6 +301,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kDictionaryEncodeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kRunEndEncodeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kArraySortOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSearchSortedOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType));
Expand Down Expand Up @@ -318,6 +343,11 @@ Result<std::shared_ptr<Array>> SelectKUnstable(const Datum& datum,
return result.make_array();
}

Result<Datum> SearchSorted(const Datum& values, const Datum& needles,
const SearchSortedOptions& options, ExecContext* ctx) {
return CallFunction("search_sorted", {values, needles}, &options, ctx);
}

Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
const Datum& replacements, ExecContext* ctx) {
return CallFunction("replace_with_mask", {values, mask, replacements}, ctx);
Expand Down
38 changes: 38 additions & 0 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,21 @@ class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
NullPlacement null_placement;
};

class ARROW_EXPORT SearchSortedOptions : public FunctionOptions {
public:
enum Side {
Left,
Right,
};

explicit SearchSortedOptions(Side side = Side::Left);
static constexpr const char kTypeName[] = "SearchSortedOptions";
static SearchSortedOptions Defaults() { return SearchSortedOptions(); }

/// Whether to return the leftmost or rightmost insertion point.
Side side;
};

class ARROW_EXPORT SortOptions : public FunctionOptions {
public:
explicit SortOptions(std::vector<SortKey> sort_keys = {});
Expand Down Expand Up @@ -598,6 +613,29 @@ Result<std::shared_ptr<Array>> SelectKUnstable(const Datum& datum,
const SelectKOptions& options,
ExecContext* ctx = NULLPTR);

/// \brief Find insertion indices that preserve sorted order.
///
/// The `values` datum must be a plain array, chunked array, or run-end encoded
/// array (including chunked run-end encoded) sorted in ascending order.
/// `needles` may be a scalar, plain array, chunked array, or run-end encoded
/// array (including chunked run-end encoded) whose logical value type matches
/// `values`.
///
/// Nulls in `values` are supported when clustered entirely at the start or the
/// end of the sorted array. Non-null needles are matched only against the
/// non-null portion of `values`. Null needles yield null outputs.
///
/// \param[in] values sorted array to search within
/// \param[in] needles scalar or array-like values to search for
/// \param[in] options selects left or right insertion semantics
/// \param[in] ctx the function execution context, optional
/// \return insertion indices as uint64 scalar or array
ARROW_EXPORT
Result<Datum> SearchSorted(
const Datum& values, const Datum& needles,
const SearchSortedOptions& options = SearchSortedOptions::Defaults(),
ExecContext* ctx = NULLPTR);

/// \brief Return the indices that would sort an array.
///
/// Perform an indirect sort of array. The output array will contain
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/initialize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Status RegisterComputeKernels() {
internal::RegisterVectorNested(registry);
internal::RegisterVectorRank(registry);
internal::RegisterVectorReplace(registry);
internal::RegisterVectorSearchSorted(registry);
internal::RegisterVectorSelectK(registry);
internal::RegisterVectorSort(registry);
internal::RegisterVectorRunEndEncode(registry);
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/compute/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,13 @@ add_arrow_compute_test(vector_sort_test
arrow_compute_kernels_testing
arrow_compute_testing)

add_arrow_compute_test(vector_search_sorted_test
SOURCES
vector_search_sorted_test.cc
EXTRA_LINK_LIBS
arrow_compute_kernels_testing
arrow_compute_testing)

add_arrow_compute_test(vector_selection_test
SOURCES
vector_selection_test.cc
Expand All @@ -141,6 +148,7 @@ add_arrow_compute_benchmark(vector_sort_benchmark)
add_arrow_compute_benchmark(vector_partition_benchmark)
add_arrow_compute_benchmark(vector_topk_benchmark)
add_arrow_compute_benchmark(vector_replace_benchmark)
add_arrow_compute_benchmark(vector_search_sorted_benchmark)
add_arrow_compute_benchmark(vector_selection_benchmark)

# ----------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/kernels/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ vector_kernel_benchmarks = [
'vector_partition_benchmark',
'vector_topk_benchmark',
'vector_replace_benchmark',
'vector_search_sorted_benchmark',
'vector_selection_benchmark',
]

Expand Down
Loading
Loading