Skip to content

Commit d661b44

Browse files
Merge pull request #969 from nextcloud/fix/unique-datasets-only-stable30
[stable30] fix(NegativeSampleGenerator): Prevent duplicate user IDs when getting…
2 parents d5fc497 + 5f82b77 commit d661b44

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

lib/Service/NegativeSampleGenerator.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
use function str_split;
2323

2424
class NegativeSampleGenerator {
25+
/**
26+
* Get IP vectors exclusively used by one user.
27+
* Includes the user vector in second dimension of the returned array.
28+
*/
2529
private function getUniqueIPsPerUser(Dataset $positives): array {
2630
$map = [];
2731

@@ -35,7 +39,7 @@ private function getUniqueIPsPerUser(Dataset $positives): array {
3539
$map[$ipVecStr] = [
3640
$uidVecStr,
3741
];
38-
} else {
42+
} elseif (!in_array($uidVecStr, $map[$ipVecStr])) {
3943
$map[$ipVecStr][] = $uidVecStr;
4044
}
4145
}

tests/Unit/Service/NegativeSampleGeneratorTest.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,37 @@ public function testGenerateMultipleShuffledFromLimitedUnique(): void {
128128
self::assertCount(5, $result);
129129
}
130130

131+
/**
132+
* DataSet can consist of multiple unique entries only. If not handled correctly,
133+
* this will result in an array without any IP. This tests the
134+
* correct handling. See GitHub issue #860 for more.
135+
* @return void
136+
*/
137+
public function testGenerateMultipleShuffledFromUniquesOnly(): void {
138+
$positives = new Unlabeled([
139+
array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)),
140+
array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)),
141+
array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)),
142+
143+
array_merge(self::decToBitArray(2, 16), self::decToBitArray(2, 32)),
144+
array_merge(self::decToBitArray(2, 16), self::decToBitArray(2, 32)),
145+
array_merge(self::decToBitArray(2, 16), self::decToBitArray(2, 32)),
146+
]);
147+
148+
$result = $this->generator->generateShuffledFromPositiveSamples($positives, 2);
149+
150+
self::assertCount(2, $result);
151+
foreach ($result as $sample) {
152+
$ipVec = array_slice($sample, 16, 32);
153+
154+
self::assertTrue(
155+
$ipVec === self::decToBitArray(1, 32) ||
156+
$ipVec === self::decToBitArray(2, 32),
157+
'Sample has an unique IP'
158+
);
159+
}
160+
}
161+
131162
/**
132163
* @return int[]
133164
*/

0 commit comments

Comments
 (0)