Skip to content

Commit af0d94e

Browse files
committed
Extract GeoLite db files in a memory-efficient way
1 parent c4b85da commit af0d94e

File tree

9 files changed

+118
-52
lines changed

9 files changed

+118
-52
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this
1010

1111
### Changed
1212
* Update to PHPUnit 12
13+
* [#71](https://github.com/shlinkio/shlink-ip-geolocation/issues/71) Decompress GeoLite2 db files in a memory-efficient way.
1314

1415
### Deprecated
1516
* *Nothing*

composer.json

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,21 @@
1313
],
1414
"require": {
1515
"php": "^8.3",
16+
"ext-zlib": "*",
1617
"fig/http-message-util": "^1.1",
17-
"geoip2/geoip2": "^3.1",
18-
"guzzlehttp/guzzle": "^7.9",
18+
"geoip2/geoip2": "^3.2",
19+
"guzzlehttp/guzzle": "^7.10",
1920
"shlinkio/shlink-config": "^4.0",
20-
"symfony/filesystem": "^7.2"
21+
"symfony/filesystem": "^7.3"
2122
},
2223
"require-dev": {
2324
"devster/ubench": "^2.1",
2425
"phpstan/phpstan": "^2.1",
2526
"phpstan/phpstan-phpunit": "^2.0",
26-
"phpunit/phpunit": "^12.0",
27+
"phpunit/phpunit": "^12.3",
2728
"roave/security-advisories": "dev-master",
2829
"shlinkio/php-coding-standard": "~2.4.0",
29-
"symfony/var-dumper": "^7.2"
30+
"symfony/var-dumper": "^7.3"
3031
},
3132
"autoload": {
3233
"psr-4": {

src/Exception/DbUpdateException.php

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,22 @@ class DbUpdateException extends RuntimeException
1212
{
1313
public static function forFailedDownload(Throwable $prev): self
1414
{
15-
return self::build('An error occurred while trying to download a fresh copy of the GeoLite2 database', $prev);
15+
return new self(
16+
'An error occurred while trying to download a fresh copy of the GeoLite2 database',
17+
previous: $prev,
18+
);
1619
}
1720

18-
public static function forFailedExtraction(string $compressedFile, Throwable $prev): self
21+
public static function forFailedExtraction(): self
1922
{
20-
return self::build(
21-
sprintf('An error occurred while trying to extract the GeoLite2 database from %s', $compressedFile),
22-
$prev,
23-
);
23+
return new self('It was not possible to extract GeoLite2 database');
2424
}
2525

2626
public static function forFailedCopyToDestination(string $destination, Throwable $prev): self
2727
{
28-
return self::build(
28+
return new self(
2929
sprintf('An error occurred while trying to copy GeoLite2 db file to %s folder', $destination),
30-
$prev,
30+
previous: $prev,
3131
);
3232
}
33-
34-
private static function build(string $message, Throwable $prev): self
35-
{
36-
return new self($message, 0, $prev);
37-
}
3833
}

src/GeoLite2/DbUpdater.php

Lines changed: 87 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,35 @@
77
use Fig\Http\Message\RequestMethodInterface as RequestMethod;
88
use GuzzleHttp\ClientInterface;
99
use GuzzleHttp\RequestOptions;
10-
use PharData;
1110
use Shlinkio\Shlink\IpGeolocation\Exception\DbUpdateException;
1211
use Shlinkio\Shlink\IpGeolocation\Exception\MissingLicenseException;
1312
use Symfony\Component\Filesystem\Exception as FilesystemException;
1413
use Symfony\Component\Filesystem\Filesystem;
1514
use Throwable;
1615

16+
use function fclose;
17+
use function feof;
18+
use function fopen;
19+
use function fread;
20+
use function fseek;
21+
use function fwrite;
22+
use function gzclose;
23+
use function gzopen;
24+
use function gzread;
25+
use function min;
26+
use function octdec;
1727
use function sprintf;
28+
use function str_ends_with;
29+
use function strlen;
30+
use function substr;
31+
use function trim;
32+
33+
use const SEEK_CUR;
1834

1935
class DbUpdater implements DbUpdaterInterface
2036
{
2137
private const DB_COMPRESSED_FILE = 'GeoLite2-City.tar.gz';
38+
private const DB_TEMP_TAR_FILE = 'GeoLite2-City.tmp.tar';
2239
private const DB_DECOMPRESSED_FILE = 'GeoLite2-City.mmdb';
2340

2441
public function __construct(
@@ -64,25 +81,86 @@ private function downloadDbFile(string $dest, callable|null $handleProgress = nu
6481
}
6582
}
6683

84+
/**
85+
* Decompress provided GZ file into a temp location, and return the path to that location
86+
*/
6787
private function extractDbFile(string $compressedFile, string $tempDir): string
6888
{
69-
try {
70-
$phar = new PharData($compressedFile);
71-
$internalPathToDb = sprintf('%s/%s', $phar->getBasename(), self::DB_DECOMPRESSED_FILE);
72-
$phar->extractTo($tempDir, $internalPathToDb, true);
89+
// Decompress temporary tar package
90+
$tarPath = $tempDir . '/' . self::DB_TEMP_TAR_FILE;
91+
$gz = @gzopen($compressedFile, 'rb');
92+
if (! $gz) {
93+
throw DbUpdateException::forFailedExtraction();
94+
}
7395

74-
return sprintf('%s/%s', $tempDir, $internalPathToDb);
75-
} catch (Throwable $e) {
76-
throw DbUpdateException::forFailedExtraction($compressedFile, $e);
96+
$tar = @fopen($tarPath, 'wb');
97+
if (! $tar) {
98+
gzclose($gz);
99+
throw DbUpdateException::forFailedExtraction();
100+
}
101+
while ($chunk = gzread($gz, 4096)) {
102+
fwrite($tar, $chunk);
77103
}
104+
gzclose($gz);
105+
fclose($tar);
106+
107+
// Process tar file sequentially, looking for the db file
108+
$tar = fopen($tarPath, 'rb');
109+
if (! $tar) {
110+
throw DbUpdateException::forFailedExtraction();
111+
}
112+
113+
$outputPath = $tempDir . '/' . self::DB_DECOMPRESSED_FILE;
114+
while (! feof($tar)) {
115+
$header = fread($tar, 512);
116+
if (! $header || strlen($header) < 512) {
117+
break;
118+
}
119+
120+
$filename = trim(substr($header, offset: 0, length: 100));
121+
if ($filename === '') {
122+
break;
123+
}
124+
125+
$size = octdec(trim(substr($header, offset: 124, length: 12)));
126+
127+
// Once we find the file, read it sequentially and return
128+
if (str_ends_with($filename, self::DB_DECOMPRESSED_FILE) && $out = fopen($outputPath, 'wb')) {
129+
$remaining = $size;
130+
while ($remaining > 0) {
131+
/** @var positive-int $readLen */
132+
$readLen = min(4096, $remaining);
133+
$data = fread($tar, $readLen);
134+
if (! $data) {
135+
break;
136+
}
137+
fwrite($out, $data);
138+
$remaining -= strlen($data);
139+
}
140+
fclose($out);
141+
fclose($tar);
142+
$this->deleteTempFiles([$tarPath]);
143+
144+
return $outputPath;
145+
}
146+
147+
// Skip this file
148+
$skip = $size + (512 - ($size % 512)) % 512;
149+
fseek($tar, offset: (int) $skip, whence: SEEK_CUR);
150+
}
151+
152+
fclose($tar);
153+
$this->deleteTempFiles([$tarPath]);
154+
155+
throw DbUpdateException::forFailedExtraction();
78156
}
79157

80158
private function copyNewDbFile(string $from): void
81159
{
82160
$destination = $this->options->dbLocation;
83161

84162
try {
85-
$this->filesystem->copy($from, $destination, true);
163+
$this->filesystem->copy($from, $destination, overwriteNewerFiles: true);
86164
$this->filesystem->chmod([$destination], 0666);
87165
} catch (FilesystemException\FileNotFoundException | FilesystemException\IOException $e) {
88166
throw DbUpdateException::forFailedCopyToDestination($destination, $e);

test-resources/.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
geolite2-testing-db
1+
GeoLite2-City.tmp.tar

test-resources/GeoLite2-City.mmdb

-4.56 MB
Binary file not shown.
28.5 MB
Binary file not shown.

test/Exception/DbUpdateExceptionTest.php

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,15 @@ public function forFailedDownloadReturnsExpectedException(): void
3232
$e->getMessage(),
3333
);
3434
self::assertEquals($this->prev, $e->getPrevious());
35-
self::assertEquals(0, $e->getCode());
3635
}
3736

3837
#[Test]
3938
public function forFailedExtractionReturnsExpectedException(): void
4039
{
41-
$e = DbUpdateException::forFailedExtraction($this->theFile, $this->prev);
40+
$e = DbUpdateException::forFailedExtraction();
4241

43-
self::assertEquals(
44-
sprintf('An error occurred while trying to extract the GeoLite2 database from %s', $this->theFile),
45-
$e->getMessage(),
46-
);
47-
self::assertEquals($this->prev, $e->getPrevious());
48-
self::assertEquals(0, $e->getCode());
42+
self::assertEquals('It was not possible to extract GeoLite2 database', $e->getMessage());
43+
self::assertNull($e->getPrevious());
4944
}
5045

5146
#[Test]
@@ -58,6 +53,5 @@ public function forFailedCopyToDestinationReturnsExpectedException(): void
5853
$e->getMessage(),
5954
);
6055
self::assertEquals($this->prev, $e->getPrevious());
61-
self::assertEquals(0, $e->getCode());
6256
}
6357
}

test/GeoLite2/DbUpdaterTest.php

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
use PHPUnit\Framework\MockObject\Rule\InvokedCount;
1515
use PHPUnit\Framework\TestCase;
1616
use Psr\Http\Message\ResponseInterface;
17+
use Shlinkio\Shlink\IpGeolocation\Exception\DbUpdateException;
1718
use Shlinkio\Shlink\IpGeolocation\Exception\MissingLicenseException;
18-
use Shlinkio\Shlink\IpGeolocation\Exception\RuntimeException;
1919
use Shlinkio\Shlink\IpGeolocation\GeoLite2\DbUpdater;
2020
use Shlinkio\Shlink\IpGeolocation\GeoLite2\GeoLite2Options;
2121
use Symfony\Component\Filesystem\Exception as FilesystemException;
@@ -44,8 +44,7 @@ public function anExceptionIsThrownIfFreshDbCannotBeDownloaded(): void
4444
->withAnyParameters()
4545
->willThrowException(new ClientException('', new Request('GET', ''), $this->response));
4646

47-
$this->expectException(RuntimeException::class);
48-
$this->expectExceptionCode(0);
47+
$this->expectException(DbUpdateException::class);
4948
$this->expectExceptionMessage(
5049
'An error occurred while trying to download a fresh copy of the GeoLite2 database',
5150
);
@@ -58,11 +57,8 @@ public function anExceptionIsThrownIfFreshDbCannotBeExtracted(): void
5857
{
5958
$this->setUpHttpClient();
6059

61-
$this->expectException(RuntimeException::class);
62-
$this->expectExceptionCode(0);
63-
$this->expectExceptionMessage(
64-
'An error occurred while trying to extract the GeoLite2 database from __invalid__/GeoLite2-City.tar.gz',
65-
);
60+
$this->expectException(DbUpdateException::class);
61+
$this->expectExceptionMessage('It was not possible to extract GeoLite2 database');
6662

6763
$this->dbUpdater('__invalid__')->downloadFreshCopy();
6864
}
@@ -73,8 +69,7 @@ public function anExceptionIsThrownIfFreshDbCannotBeCopiedToDestination(callable
7369
$this->setUpHttpClient();
7470
$prepareFs($this->filesystem);
7571

76-
$this->expectException(RuntimeException::class);
77-
$this->expectExceptionCode(0);
72+
$this->expectException(DbUpdateException::class);
7873
$this->expectExceptionMessage('An error occurred while trying to copy GeoLite2 db file to db_location folder');
7974

8075
$this->dbUpdater()->downloadFreshCopy();
@@ -105,7 +100,7 @@ public function noExceptionsAreThrownIfEverythingWorksFine(): void
105100
$this->setUpHttpClient();
106101
$this->filesystem->expects($this->once())->method('copy')->withAnyParameters();
107102
$this->filesystem->expects($this->once())->method('chmod')->withAnyParameters();
108-
$this->filesystem->expects($this->once())->method('remove')->withAnyParameters();
103+
$this->filesystem->expects($this->exactly(2))->method('remove')->withAnyParameters();
109104

110105
$this->dbUpdater()->downloadFreshCopy();
111106
}
@@ -135,7 +130,7 @@ public function anExceptionIsThrownIfNoLicenseKeyIsProvided(string|null $license
135130
$this->expectException(MissingLicenseException::class);
136131
$this->expectExceptionMessage('Impossible to download GeoLite2 db file. A license key was not provided.');
137132

138-
$this->dbUpdater(null, $license)->downloadFreshCopy();
133+
$this->dbUpdater(licenseKey: $license)->downloadFreshCopy();
139134
}
140135

141136
public static function provideInvalidLicenses(): iterable
@@ -144,12 +139,14 @@ public static function provideInvalidLicenses(): iterable
144139
yield 'empty license' => [''];
145140
}
146141

147-
private function dbUpdater(string|null $tempDir = null, string|null $licenseKey = 'foobar'): DbUpdater
148-
{
142+
private function dbUpdater(
143+
string $tempDir = __DIR__ . '/../../test-resources',
144+
string|null $licenseKey = 'foobar',
145+
): DbUpdater {
149146
$options = new GeoLite2Options(
150147
licenseKey: $licenseKey,
151148
dbLocation: 'db_location',
152-
tempDir: $tempDir ?? __DIR__ . '/../../test-resources',
149+
tempDir: $tempDir,
153150
);
154151
return new DbUpdater($this->httpClient, $this->filesystem, $options);
155152
}

0 commit comments

Comments
 (0)