Skip to content

Commit fafe1ef

Browse files
ChristophWurstbackportbot[bot]
authored andcommitted
fix: try to avoid memory exhaustion
Signed-off-by: Christoph Wurst <christoph@winzerhof-wurst.at>
1 parent 628aed8 commit fafe1ef

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

lib/Db/LoginAddressAggregatedMapper.php

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ private function findHistoricIpv4(int $threshold, int $maxAge): array {
6767
$qb->expr()->like('ip', $qb->createNamedParameter('_%._%._%._%')),
6868
$qb->expr()->gte('last_seen', $qb->createNamedParameter($maxAge)),
6969
$qb->expr()->lte('first_seen', $qb->createNamedParameter($threshold))
70-
));
70+
))
71+
->orderBy('last_seen', 'DESC') // Use most recent data in case of limiting
72+
->setMaxResults(15_000); // More data will like exhaust memory
7173

7274
return $this->findEntities($query);
7375
}
@@ -81,7 +83,9 @@ private function findRecentIpV4(int $threshold): array {
8183
->where($qb->expr()->andX(
8284
$qb->expr()->like('ip', $qb->createNamedParameter('_%._%._%._%')),
8385
$qb->expr()->gt('last_seen', $qb->createNamedParameter($threshold))
84-
));
86+
))
87+
->orderBy('last_seen', 'DESC') // Use most recent data in case of limiting
88+
->setMaxResults(3_000); // More data will like exhaust memory;
8589

8690
return $this->findEntities($query);
8791
}
@@ -145,7 +149,9 @@ private function findHistoricIpv6(int $threshold, int $maxAge): array {
145149
$qb->expr()->notLike('ip', $qb->createNamedParameter('_%._%._%._%')),
146150
$qb->expr()->gte('last_seen', $qb->createNamedParameter($maxAge)),
147151
$qb->expr()->lte('first_seen', $qb->createNamedParameter($threshold))
148-
));
152+
))
153+
->orderBy('last_seen', 'DESC') // Use most recent data in case of limiting
154+
->setMaxResults(15_000); // More data will like exhaust memory;
149155

150156
return $this->findEntities($query);
151157
}
@@ -159,7 +165,9 @@ private function findRecentIpV6(int $threshold): array {
159165
->where($qb->expr()->andX(
160166
$qb->expr()->notLike('ip', $qb->createNamedParameter('_%._%._%._%')),
161167
$qb->expr()->gt('last_seen', $qb->createNamedParameter($threshold))
162-
));
168+
))
169+
->orderBy('last_seen', 'DESC') // Use most recent data in case of limiting
170+
->setMaxResults(3_000); // More data will like exhaust memory
163171

164172
return $this->findEntities($query);
165173
}

lib/Service/DataLoader.php

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,11 @@
2222
use function floor;
2323
use function log;
2424
use function max;
25+
use function random_int;
2526

2627
class DataLoader {
28+
private const MAX_SAMPLES_POSITIVES = 15_000;
29+
private const MAX_SAMPLES_VALIDATE_POSITIVES = 3_000;
2730

2831
/** @var LoginAddressAggregatedMapper */
2932
private $loginAddressMapper;
@@ -65,6 +68,14 @@ public function loadTrainingAndValidationData(TrainingDataConfig $dataConfig,
6568

6669
$positives = $this->addressesToDataSet($historyRaw, $strategy);
6770
$validationPositives = $this->addressesToDataSet($recentRaw, $strategy);
71+
if ($positives->count() > self::MAX_SAMPLES_POSITIVES) {
72+
$threshold = (self::MAX_SAMPLES_POSITIVES / $positives->count()) * 100;
73+
$positives = $positives->filter(fn () => random_int(0, 100) <= $threshold);
74+
}
75+
if ($validationPositives->count() > self::MAX_SAMPLES_VALIDATE_POSITIVES) {
76+
$threshold = (self::MAX_SAMPLES_VALIDATE_POSITIVES / $validationPositives->count()) * 100;
77+
$validationPositives = $validationPositives->filter(fn () => random_int(0, 100) <= $threshold);
78+
}
6879

6980
return new CollectedData(
7081
$positives,

0 commit comments

Comments
 (0)