-
Notifications
You must be signed in to change notification settings - Fork 183
Add NearestNeighbors SPMD API #2557
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
7213c80
7aea5a6
fa48719
3765c6c
b3c66af
ced4aca
72fc707
ca9408b
e8c1ed9
eca8bff
47a7d93
a56ee49
544cca3
8532fe3
b9eb2df
ae7ade0
19cde34
ffba570
0b7777e
5faaa8e
466e195
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
_assert_unordered_allclose, | ||
_generate_classification_data, | ||
_generate_regression_data, | ||
_generate_statistic_data, | ||
_get_local_tensor, | ||
_mpi_libs_and_gpu_available, | ||
_spmd_assert_allclose, | ||
|
@@ -308,3 +309,83 @@ def test_knnreg_spmd_synthetic( | |
spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol | ||
) | ||
_spmd_assert_allclose(spmd_result, batch_result, rtol=tol, atol=tol) | ||
|
||
|
||
@pytest.mark.skipif( | ||
not _mpi_libs_and_gpu_available, | ||
reason="GPU device and MPI libs required for test", | ||
) | ||
@pytest.mark.parametrize( | ||
"dataframe,queue", | ||
get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), | ||
) | ||
@pytest.mark.mpi | ||
def test_knnsearch_spmd_gold(dataframe, queue): | ||
# Import spmd and batch algo | ||
from sklearnex.neighbors import NearestNeighbors as NearestNeighbors_Batch | ||
from sklearnex.spmd.neighbors import NearestNeighbors as NearestNeighbors_SPMD | ||
|
||
# Create gold data and convert to dataframe | ||
X_train = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) | ||
ethanglaser marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
local_dpt_X_train = _convert_to_dataframe( | ||
_get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe | ||
) | ||
|
||
# Ensure predictions of batch algo match spmd | ||
spmd_model = NearestNeighbors_SPMD(n_neighbors=2, algorithm="brute").fit( | ||
local_dpt_X_train | ||
) | ||
batch_model = NearestNeighbors_Batch(n_neighbors=2, algorithm="brute").fit(X_train) | ||
spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_train) | ||
batch_dists, batch_indcs = batch_model.kneighbors(X_train) | ||
|
||
_assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True) | ||
_assert_unordered_allclose(spmd_dists, batch_dists, localize=True) | ||
|
||
|
||
@pytest.mark.skipif( | ||
not _mpi_libs_and_gpu_available, | ||
reason="GPU device and MPI libs required for test", | ||
) | ||
@pytest.mark.parametrize( | ||
"dimensions", [{"n": 100, "m": 10, "k": 2}, {"n": 100000, "m": 100, "k": 100}] | ||
ethanglaser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
@pytest.mark.parametrize( | ||
"dataframe,queue", | ||
get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), | ||
) | ||
@pytest.mark.parametrize("dtype", [np.float32, np.float64]) | ||
@pytest.mark.mpi | ||
def test_knnsearch_spmd_synthetic( | ||
dimensions, | ||
dataframe, | ||
queue, | ||
dtype, | ||
): | ||
if dimensions["n"] > 10000 and dtype == np.float32: | ||
pytest.skip("Skipping large float32 test due to expected precision issues") | ||
|
||
# Import spmd and batch algo | ||
from sklearnex.neighbors import NearestNeighbors as NearestNeighbors_Batch | ||
from sklearnex.spmd.neighbors import NearestNeighbors as NearestNeighbors_SPMD | ||
|
||
# Generate data and convert to dataframe | ||
X_train = _generate_statistic_data(dimensions["n"], dimensions["m"], dtype=dtype) | ||
|
||
local_dpt_X_train = _convert_to_dataframe( | ||
_get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe | ||
) | ||
|
||
# Ensure search results of batch algo match spmd | ||
spmd_model = NearestNeighbors_SPMD( | ||
n_neighbors=dimensions["k"], algorithm="brute" | ||
).fit(local_dpt_X_train) | ||
batch_model = NearestNeighbors_Batch( | ||
n_neighbors=dimensions["k"], algorithm="brute" | ||
).fit(X_train) | ||
spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_train) | ||
batch_dists, batch_indcs = batch_model.kneighbors(X_train) | ||
|
||
tol = 0.005 if dtype == np.float32 else 1e-6 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yikes on this float32 setting. Any info on it? Especially because there is a skip associated with it above (meaning an even worse value occurs?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's true, and good observation. It's pretty tricky because this assert all close functionality will fail even if a single element is not within the threshold, hence why it is so loose - it would be nice if there was some sort of customization of that. It's possible that we could still run the indices check for this case, but distances are more fragile. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not the only place in spmd test scope where drastically low thresholds are needed to support float32 tests passing though |
||
_assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True) | ||
_assert_unordered_allclose(spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol) |
Uh oh!
There was an error while loading. Please reload this page.