Skip to content

Commit 951d216

Browse files
Merge pull request #972 from nextcloud/backport/969/stable29
[stable29] [stable30] fix(NegativeSampleGenerator): Prevent duplicate user IDs when getting…
2 parents 0519527 + 3acf476 commit 951d216

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

lib/Service/NegativeSampleGenerator.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@
3838
use function str_split;
3939

4040
class NegativeSampleGenerator {
41+
/**
42+
* Get IP vectors exclusively used by one user.
43+
* Includes the user vector in second dimension of the returned array.
44+
*/
4145
private function getUniqueIPsPerUser(Dataset $positives): array {
4246
$map = [];
4347

@@ -51,7 +55,7 @@ private function getUniqueIPsPerUser(Dataset $positives): array {
5155
$map[$ipVecStr] = [
5256
$uidVecStr,
5357
];
54-
} else {
58+
} elseif (!in_array($uidVecStr, $map[$ipVecStr])) {
5559
$map[$ipVecStr][] = $uidVecStr;
5660
}
5761
}

tests/Unit/Service/NegativeSampleGeneratorTest.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,37 @@ public function testGenerateMultipleShuffledFromLimitedUnique(): void {
144144
self::assertCount(5, $result);
145145
}
146146

147+
/**
148+
* DataSet can consist of multiple unique entries only. If not handled correctly,
149+
* this will result in an array without any IP. This tests the
150+
* correct handling. See GitHub issue #860 for more.
151+
* @return void
152+
*/
153+
public function testGenerateMultipleShuffledFromUniquesOnly(): void {
154+
$positives = new Unlabeled([
155+
array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)),
156+
array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)),
157+
array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)),
158+
159+
array_merge(self::decToBitArray(2, 16), self::decToBitArray(2, 32)),
160+
array_merge(self::decToBitArray(2, 16), self::decToBitArray(2, 32)),
161+
array_merge(self::decToBitArray(2, 16), self::decToBitArray(2, 32)),
162+
]);
163+
164+
$result = $this->generator->generateShuffledFromPositiveSamples($positives, 2);
165+
166+
self::assertCount(2, $result);
167+
foreach ($result as $sample) {
168+
$ipVec = array_slice($sample, 16, 32);
169+
170+
self::assertTrue(
171+
$ipVec === self::decToBitArray(1, 32) ||
172+
$ipVec === self::decToBitArray(2, 32),
173+
'Sample has an unique IP'
174+
);
175+
}
176+
}
177+
147178
/**
148179
* @return int[]
149180
*/

0 commit comments

Comments
 (0)