From 2167fcfff871b6d9068daca6399c8f2ad44f3fe2 Mon Sep 17 00:00:00 2001 From: Chris Kucharczyk Date: Fri, 23 Dec 2022 16:51:55 -0600 Subject: [PATCH 1/3] reindex candidate users included in test set to prevent duplicate inclusion --- implicit/evaluation.pyx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/implicit/evaluation.pyx b/implicit/evaluation.pyx index f25d0da..4f19c41 100644 --- a/implicit/evaluation.pyx +++ b/implicit/evaluation.pyx @@ -205,8 +205,13 @@ cpdef leave_k_out_split( candidate_items = items[full_candidate_mask] candidate_data = data[full_candidate_mask] + # reindex candidate_user indices so they are properly formatted for the + # calculations in _take_tails + xsorted = np.argsort(unique_candidate_users) + reindexed_candidate_users = np.searchsorted(unique_candidate_users[xsorted], candidate_users) + test_idx, train_idx = _take_tails( - candidate_users, K, shuffled=True, return_complement=True + reindexed_candidate_users, K, shuffled=True, return_complement=True ) # get all remaining remaining candidate user-item pairs, and prepare to append to From c054a367c3324de02e6603b5beb74dea33566412 Mon Sep 17 00:00:00 2001 From: Chris Kucharczyk Date: Fri, 23 Dec 2022 16:52:04 -0600 Subject: [PATCH 2/3] add static matrix tests --- tests/evaluation_test.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/evaluation_test.py b/tests/evaluation_test.py index df02261..5d37b68 100644 --- a/tests/evaluation_test.py +++ b/tests/evaluation_test.py @@ -15,9 +15,20 @@ def _get_sample_matrix(): def _get_matrix(): - mat = random(100, 100, density=0.5, format="csr", dtype=np.float32) + mat = random(100, 100, density=0.1, format="csr", dtype=np.float32) return mat.tocoo() +def _get_fixed_matrix(): + mat = csr_matrix([ + [1, 0, 0, 0], + [3, 2, 1, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 1, 1, 1], + [0, 0, 1, 0], + ]) + return mat.tocoo() def test_train_test_split(): seed = np.random.randint(1000) @@ -33,7 +44,7 @@ def test_leave_k_out_returns_correct_shape(): """ mat = _get_matrix() - train, test = leave_k_out_split(mat, K=1) + train, test = leave_k_out_split(mat, K=1) assert train.shape == mat.shape assert test.shape == mat.shape @@ -48,6 +59,10 @@ def test_leave_k_out_outputs_produce_input(): train, test = leave_k_out_split(mat, K=1) assert ((train + test) - mat).nnz == 0 + mat = _get_fixed_matrix() + train, test = leave_k_out_split(mat, K=1) + assert ((train + test) - mat).nnz == 0 + def test_leave_k_split_is_reservable(): """ From 3e29b83d884f708b3aedb946f3ec1805423fe048 Mon Sep 17 00:00:00 2001 From: Chris Kucharczyk Date: Fri, 23 Dec 2022 16:54:01 -0600 Subject: [PATCH 3/3] format --- tests/evaluation_test.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/evaluation_test.py b/tests/evaluation_test.py index 5d37b68..8460150 100644 --- a/tests/evaluation_test.py +++ b/tests/evaluation_test.py @@ -18,18 +18,22 @@ def _get_matrix(): mat = random(100, 100, density=0.1, format="csr", dtype=np.float32) return mat.tocoo() + def _get_fixed_matrix(): - mat = csr_matrix([ - [1, 0, 0, 0], - [3, 2, 1, 0], - [1, 0, 0, 0], - [0, 1, 0, 0], - [0, 0, 1, 0], - [0, 1, 1, 1], - [0, 0, 1, 0], - ]) + mat = csr_matrix( + [ + [1, 0, 0, 0], + [3, 2, 1, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 1, 1, 1], + [0, 0, 1, 0], + ] + ) return mat.tocoo() + def test_train_test_split(): seed = np.random.randint(1000) mat = _get_sample_matrix() @@ -44,7 +48,7 @@ def test_leave_k_out_returns_correct_shape(): """ mat = _get_matrix() - train, test = leave_k_out_split(mat, K=1) + train, test = leave_k_out_split(mat, K=1) assert train.shape == mat.shape assert test.shape == mat.shape