Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,6 @@ add_library(
src/join/mixed_join_semi.cu
src/join/mixed_join_size_kernel.cu
src/join/mixed_join_size_kernel_nulls.cu
src/join/semi_join.cu
src/join/sort_merge_join.cu
src/json/json_path.cu
src/lists/contains.cu
Expand Down
67 changes: 0 additions & 67 deletions cpp/include/cudf/join/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,73 +201,6 @@ full_join(cudf::table_view const& left_keys,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Returns a vector of row indices corresponding to a left semi-join
* between the specified tables.
*
* @deprecated Use the object-oriented filtered_join `cudf::filtered_join::anti_join` instead
*
* The returned vector contains the row indices from the left table
* for which there is a matching row in the right table.
*
* @code{.pseudo}
* TableA: {{0, 1, 2}}
* TableB: {{1, 2, 3}}
* Result: {1, 2}
* @endcode
*
* @param left_keys The left table
* @param right_keys The right table
* @param compare_nulls Controls whether null join-key values should match or not
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table and columns' device memory
*
* @return A vector `left_indices` that can be used to construct
* the result of performing a left semi join between two tables with
* `left_keys` and `right_keys` as the join keys .
*/
[[deprecated]] std::unique_ptr<rmm::device_uvector<size_type>> left_semi_join(
cudf::table_view const& left_keys,
cudf::table_view const& right_keys,
null_equality compare_nulls = null_equality::EQUAL,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Returns a vector of row indices corresponding to a left anti join
* between the specified tables.
*
* @deprecated Use the object-oriented filtered_join `cudf::filtered_join::semi_join` instead
*
* The returned vector contains the row indices from the left table
* for which there is no matching row in the right table.
*
* @code{.pseudo}
* TableA: {{0, 1, 2}}
* TableB: {{1, 2, 3}}
* Result: {0}
* @endcode
*
* @throw cudf::logic_error if the number of columns in either `left_keys` or `right_keys` is 0
*
* @param[in] left_keys The left table
* @param[in] right_keys The right table
* @param[in] compare_nulls controls whether null join-key values
* should match or not.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table and columns' device memory
*
* @return A column `left_indices` that can be used to construct
* the result of performing a left anti join between two tables with
* `left_keys` and `right_keys` as the join keys .
*/
[[deprecated]] std::unique_ptr<rmm::device_uvector<size_type>> left_anti_join(
cudf::table_view const& left_keys,
cudf::table_view const& right_keys,
null_equality compare_nulls = null_equality::EQUAL,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Performs a cross join on two tables (`left`, `right`)
*
Expand Down
86 changes: 0 additions & 86 deletions cpp/src/join/semi_join.cu

This file was deleted.

15 changes: 11 additions & 4 deletions java/src/main/native/src/TableJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <cudf/io/parquet.hpp>
#include <cudf/join/conditional_join.hpp>
#include <cudf/join/distinct_hash_join.hpp>
#include <cudf/join/filtered_join.hpp>
#include <cudf/join/hash_join.hpp>
#include <cudf/join/join.hpp>
#include <cudf/join/mixed_join.hpp>
Expand Down Expand Up @@ -3511,13 +3512,16 @@ Java_ai_rapids_cudf_Table_mixedFullJoinGatherMaps(JNIEnv* env,
JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftSemiJoinGatherMap(
JNIEnv* env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal)
{
double constexpr load_factor = 0.5;
return cudf::jni::join_gather_single_map(
env,
j_left_keys,
j_right_keys,
compare_nulls_equal,
[](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) {
return cudf::left_semi_join(left, right, nulleq);
[load_factor](
cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) {
cudf::filtered_join obj(right, nulleq, cudf::set_as_build_table::RIGHT, load_factor);
return obj.semi_join(left);
});
}

Expand Down Expand Up @@ -3608,13 +3612,16 @@ Java_ai_rapids_cudf_Table_mixedLeftSemiJoinGatherMap(JNIEnv* env,
JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftAntiJoinGatherMap(
JNIEnv* env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal)
{
double constexpr load_factor = 0.5;
return cudf::jni::join_gather_single_map(
env,
j_left_keys,
j_right_keys,
compare_nulls_equal,
[](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) {
return cudf::left_anti_join(left, right, nulleq);
[load_factor](
cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) {
cudf::filtered_join obj(right, nulleq, cudf::set_as_build_table::RIGHT, load_factor);
return obj.anti_join(left);
});
}

Expand Down
5 changes: 5 additions & 0 deletions python/pylibcudf/pylibcudf/join.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0

from libcpp.memory cimport unique_ptr
from pylibcudf.libcudf cimport join as cpp_join
from pylibcudf.libcudf.types cimport null_equality
from rmm.pylibrmm.stream cimport Stream
from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
Expand Down Expand Up @@ -148,3 +150,6 @@ cpdef Column mixed_left_anti_join(
Stream stream=*,
DeviceMemoryResource mr=*,
)

cdef class FilteredJoin:
cdef unique_ptr[cpp_join.filtered_join] c_obj
30 changes: 29 additions & 1 deletion python/pylibcudf/pylibcudf/join.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0

from enum import IntEnum

from rmm.pylibrmm import Stream
from rmm.pylibrmm.memory_resource import DeviceMemoryResource

Expand All @@ -9,6 +11,10 @@ from pylibcudf.expressions import Expression
from pylibcudf.table import Table
from pylibcudf.types import NullEquality

class SetAsBuildTable(IntEnum):
LEFT = ...
RIGHT = ...

def inner_join(
left_keys: Table,
right_keys: Table,
Expand Down Expand Up @@ -135,3 +141,25 @@ def mixed_left_anti_join(
stream: Stream | None = None,
mr: DeviceMemoryResource | None = None,
) -> Column: ...

class FilteredJoin:
def __init__(
self,
build: Table,
compare_nulls: NullEquality,
reuse_tbl: SetAsBuildTable,
load_factor: float,
stream: Stream | None = None,
) -> None: ...
def semi_join(
self,
probe: Table,
stream: Stream | None = None,
mr: DeviceMemoryResource | None = None,
) -> Column: ...
def anti_join(
self,
probe: Table,
stream: Stream | None = None,
mr: DeviceMemoryResource | None = None,
) -> Column: ...
Loading