Skip to content

Commit e829f5e

Browse files
committed
Add decoding benchmark for GZIP-compressed CSV files
1 parent 7c3465f commit e829f5e

File tree

2 files changed

+54
-2
lines changed

2 files changed

+54
-2
lines changed

composer.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
"react/stream": "^1.0 || ^0.7 || ^0.6"
1919
},
2020
"require-dev": {
21-
"react/event-loop": "^1.0 || ^0.5 || ^0.4 || ^0.3",
22-
"phpunit/phpunit": "^7.0 || ^6.0 || ^5.7 || ^4.8.35"
21+
"clue/zlib-react": "^1.0 || ^0.2.2",
22+
"phpunit/phpunit": "^7.0 || ^6.0 || ^5.7 || ^4.8.35",
23+
"react/event-loop": "^1.0 || ^0.5 || ^0.4 || ^0.3"
2324
}
2425
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
<?php
2+
3+
// getting reasonable results requires a large data set:
4+
// 1) download a large CSV data set, for example from https://github.com/fivethirtyeight/russian-troll-tweets
5+
// $ curl -OL https://github.com/fivethirtyeight/russian-troll-tweets/raw/master/IRAhandle_tweets_1.csv
6+
//
7+
// 2) If your data set it not already in gzip format, compress it:
8+
// $ gzip < IRAhandle_tweets_1.csv > IRAhandle_tweets_1.csv.gz
9+
//
10+
// 3) pipe compressed CSV into benchmark script:
11+
// $ php examples/92-benchmark-count-gzip.php < IRAhandle_tweets_1.csv.gz
12+
13+
use Clue\React\Csv\AssocDecoder;
14+
use Clue\React\Zlib\Decompressor;
15+
use React\EventLoop\Factory;
16+
use React\Stream\ReadableResourceStream;
17+
18+
require __DIR__ . '/../vendor/autoload.php';
19+
20+
if (extension_loaded('xdebug')) {
21+
echo 'NOTICE: The "xdebug" extension is loaded, this has a major impact on performance.' . PHP_EOL;
22+
}
23+
24+
$loop = Factory::create();
25+
$input = new ReadableResourceStream(STDIN, $loop);
26+
$decompressor = new Decompressor(ZLIB_ENCODING_GZIP);
27+
$input->pipe($decompressor);
28+
$decoder = new AssocDecoder($decompressor);
29+
30+
$decompressor->on('error', function (Exception $e) {
31+
printf("\nDecompression error: " . $e->getMessage() . "\n");
32+
});
33+
34+
$count = 0;
35+
$decoder->on('data', function () use (&$count) {
36+
++$count;
37+
});
38+
39+
$start = microtime(true);
40+
$report = $loop->addPeriodicTimer(0.05, function () use (&$count, $start) {
41+
printf("\r%d records in %0.3fs...", $count, microtime(true) - $start);
42+
});
43+
44+
$decoder->on('close', function () use (&$count, $report, $loop, $start) {
45+
$now = microtime(true);
46+
$loop->cancelTimer($report);
47+
48+
printf("\r%d records in %0.3fs => %d records/s\n", $count, $now - $start, $count / ($now - $start));
49+
});
50+
51+
$loop->run();

0 commit comments

Comments
 (0)