Skip to content
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,6 @@ add_library(
src/join/mixed_join_semi.cu
src/join/mixed_join_size_kernel.cu
src/join/mixed_join_size_kernel_nulls.cu
src/join/semi_join.cu
src/join/sort_merge_join.cu
src/json/json_path.cu
src/lists/contains.cu
Expand Down
67 changes: 0 additions & 67 deletions cpp/include/cudf/join/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,73 +201,6 @@ full_join(cudf::table_view const& left_keys,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Returns a vector of row indices corresponding to a left semi-join
* between the specified tables.
*
* @deprecated Use the object-oriented filtered_join `cudf::filtered_join::anti_join` instead
*
* The returned vector contains the row indices from the left table
* for which there is a matching row in the right table.
*
* @code{.pseudo}
* TableA: {{0, 1, 2}}
* TableB: {{1, 2, 3}}
* Result: {1, 2}
* @endcode
*
* @param left_keys The left table
* @param right_keys The right table
* @param compare_nulls Controls whether null join-key values should match or not
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table and columns' device memory
*
* @return A vector `left_indices` that can be used to construct
* the result of performing a left semi join between two tables with
* `left_keys` and `right_keys` as the join keys .
*/
[[deprecated]] std::unique_ptr<rmm::device_uvector<size_type>> left_semi_join(
cudf::table_view const& left_keys,
cudf::table_view const& right_keys,
null_equality compare_nulls = null_equality::EQUAL,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Returns a vector of row indices corresponding to a left anti join
* between the specified tables.
*
* @deprecated Use the object-oriented filtered_join `cudf::filtered_join::semi_join` instead
*
* The returned vector contains the row indices from the left table
* for which there is no matching row in the right table.
*
* @code{.pseudo}
* TableA: {{0, 1, 2}}
* TableB: {{1, 2, 3}}
* Result: {0}
* @endcode
*
* @throw cudf::logic_error if the number of columns in either `left_keys` or `right_keys` is 0
*
* @param[in] left_keys The left table
* @param[in] right_keys The right table
* @param[in] compare_nulls controls whether null join-key values
* should match or not.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table and columns' device memory
*
* @return A column `left_indices` that can be used to construct
* the result of performing a left anti join between two tables with
* `left_keys` and `right_keys` as the join keys .
*/
[[deprecated]] std::unique_ptr<rmm::device_uvector<size_type>> left_anti_join(
cudf::table_view const& left_keys,
cudf::table_view const& right_keys,
null_equality compare_nulls = null_equality::EQUAL,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Performs a cross join on two tables (`left`, `right`)
*
Expand Down
86 changes: 0 additions & 86 deletions cpp/src/join/semi_join.cu

This file was deleted.

15 changes: 11 additions & 4 deletions java/src/main/native/src/TableJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <cudf/io/parquet.hpp>
#include <cudf/join/conditional_join.hpp>
#include <cudf/join/distinct_hash_join.hpp>
#include <cudf/join/filtered_join.hpp>
#include <cudf/join/hash_join.hpp>
#include <cudf/join/join.hpp>
#include <cudf/join/mixed_join.hpp>
Expand Down Expand Up @@ -3511,13 +3512,16 @@ Java_ai_rapids_cudf_Table_mixedFullJoinGatherMaps(JNIEnv* env,
JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftSemiJoinGatherMap(
JNIEnv* env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal)
{
double constexpr load_factor = 0.5;
return cudf::jni::join_gather_single_map(
env,
j_left_keys,
j_right_keys,
compare_nulls_equal,
[](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) {
return cudf::left_semi_join(left, right, nulleq);
[load_factor](
cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) {
cudf::filtered_join obj(right, nulleq, cudf::set_as_build_table::RIGHT, load_factor);
return obj.semi_join(left);
});
}

Expand Down Expand Up @@ -3608,13 +3612,16 @@ Java_ai_rapids_cudf_Table_mixedLeftSemiJoinGatherMap(JNIEnv* env,
JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftAntiJoinGatherMap(
JNIEnv* env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal)
{
double constexpr load_factor = 0.5;
return cudf::jni::join_gather_single_map(
env,
j_left_keys,
j_right_keys,
compare_nulls_equal,
[](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) {
return cudf::left_anti_join(left, right, nulleq);
[load_factor](
cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) {
cudf::filtered_join obj(right, nulleq, cudf::set_as_build_table::RIGHT, load_factor);
return obj.anti_join(left);
});
}

Expand Down
5 changes: 5 additions & 0 deletions python/pylibcudf/pylibcudf/join.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0

from libcpp.memory cimport unique_ptr
from pylibcudf.libcudf cimport join as cpp_join
from pylibcudf.libcudf.types cimport null_equality
from rmm.pylibrmm.stream cimport Stream
from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
Expand Down Expand Up @@ -148,3 +150,6 @@ cpdef Column mixed_left_anti_join(
Stream stream=*,
DeviceMemoryResource mr=*,
)

cdef class FilteredJoin:
cdef unique_ptr[cpp_join.filtered_join] c_obj
24 changes: 23 additions & 1 deletion python/pylibcudf/pylibcudf/join.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0

from rmm.pylibrmm import Stream
Expand Down Expand Up @@ -135,3 +135,25 @@ def mixed_left_anti_join(
stream: Stream | None = None,
mr: DeviceMemoryResource | None = None,
) -> Column: ...

class FilteredJoin:
def __init__(
self,
build: Table,
compare_nulls: NullEquality,
reuse_tbl: int,
stream: Stream | None = None,
load_factor: float = 0.5,
) -> None: ...
def semi_join(
self,
probe: Table,
stream: Stream | None = None,
mr: DeviceMemoryResource | None = None,
) -> Column: ...
def anti_join(
self,
probe: Table,
stream: Stream | None = None,
mr: DeviceMemoryResource | None = None,
) -> Column: ...
Loading
Loading