Skip to content

Commit 63dcdef

Browse files
authored
Merge pull request #15 from clue-labs/benchmark
Add decoding benchmark plus benchmark for GZIP-compressed CSV files
2 parents 95abcc2 + 9e36683 commit 63dcdef

File tree

3 files changed

+101
-2
lines changed

3 files changed

+101
-2
lines changed

composer.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
"react/stream": "^1.0 || ^0.7 || ^0.6"
1919
},
2020
"require-dev": {
21-
"react/event-loop": "^1.0 || ^0.5 || ^0.4 || ^0.3",
22-
"phpunit/phpunit": "^7.0 || ^6.0 || ^5.7 || ^4.8.35"
21+
"phpunit/phpunit": "^7.0 || ^6.0 || ^5.7 || ^4.8.35",
22+
"react/child-process": "^0.6",
23+
"react/event-loop": "^1.0 || ^0.5 || ^0.4 || ^0.3"
2324
}
2425
}

examples/91-benchmark-count.php

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?php
2+
3+
// simple usage:
4+
// $ php examples/91-benchmark-count.php < examples/users.csv
5+
//
6+
// getting reasonable results requires a large data set:
7+
// 1) download a large CSV data set, for example from https://github.com/fivethirtyeight/russian-troll-tweets
8+
// $ curl -OL https://github.com/fivethirtyeight/russian-troll-tweets/raw/master/IRAhandle_tweets_1.csv
9+
//
10+
// 2) pipe CSV into benchmark script:
11+
// $ php examples/91-benchmark-count.php < IRAhandle_tweets_1.csv
12+
13+
use Clue\React\Csv\AssocDecoder;
14+
use React\EventLoop\Factory;
15+
use React\Stream\ReadableResourceStream;
16+
17+
require __DIR__ . '/../vendor/autoload.php';
18+
19+
if (extension_loaded('xdebug')) {
20+
echo 'NOTICE: The "xdebug" extension is loaded, this has a major impact on performance.' . PHP_EOL;
21+
}
22+
23+
$loop = Factory::create();
24+
$decoder = new AssocDecoder(new ReadableResourceStream(STDIN, $loop));
25+
26+
$count = 0;
27+
$decoder->on('data', function () use (&$count) {
28+
++$count;
29+
});
30+
31+
$start = microtime(true);
32+
$report = $loop->addPeriodicTimer(0.05, function () use (&$count, $start) {
33+
printf("\r%d records in %0.3fs...", $count, microtime(true) - $start);
34+
});
35+
36+
$decoder->on('close', function () use (&$count, $report, $loop, $start) {
37+
$now = microtime(true);
38+
$loop->cancelTimer($report);
39+
40+
printf("\r%d records in %0.3fs => %d records/s\n", $count, $now - $start, $count / ($now - $start));
41+
});
42+
43+
$loop->run();
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?php
2+
3+
// getting reasonable results requires a large data set:
4+
// 1) download a large CSV data set, for example from https://github.com/fivethirtyeight/russian-troll-tweets
5+
// $ curl -OL https://github.com/fivethirtyeight/russian-troll-tweets/raw/master/IRAhandle_tweets_1.csv
6+
//
7+
// 2) If your data set it not already in gzip format, compress it:
8+
// $ gzip < IRAhandle_tweets_1.csv > IRAhandle_tweets_1.csv.gz
9+
//
10+
// 3) pipe compressed CSV into benchmark script:
11+
// $ php examples/92-benchmark-count-gzip.php < IRAhandle_tweets_1.csv.gz
12+
13+
use Clue\React\Csv\AssocDecoder;
14+
use React\ChildProcess\Process;
15+
use React\EventLoop\Factory;
16+
17+
require __DIR__ . '/../vendor/autoload.php';
18+
19+
if (extension_loaded('xdebug')) {
20+
echo 'NOTICE: The "xdebug" extension is loaded, this has a major impact on performance.' . PHP_EOL;
21+
}
22+
23+
$loop = Factory::create();
24+
25+
// This benchmark example spawns the decompressor in a child `gunzip` process
26+
// because parsing CSV files is already mostly CPU-bound and multi-processing
27+
// is preferred here. If the input source is slower (such as an HTTP download)
28+
// or if `gunzip` is not available (Windows), using a built-in decompressor
29+
// such as https://github.com/clue/reactphp-zlib would be preferable.
30+
$process = new Process('exec gunzip', null, null, array(
31+
0 => STDIN,
32+
1 => array('pipe', 'w'),
33+
STDERR
34+
));
35+
$process->start($loop);
36+
$decoder = new AssocDecoder($process->stdout);
37+
38+
$count = 0;
39+
$decoder->on('data', function () use (&$count) {
40+
++$count;
41+
});
42+
43+
$start = microtime(true);
44+
$report = $loop->addPeriodicTimer(0.05, function () use (&$count, $start) {
45+
printf("\r%d records in %0.3fs...", $count, microtime(true) - $start);
46+
});
47+
48+
$decoder->on('close', function () use (&$count, $report, $loop, $start) {
49+
$now = microtime(true);
50+
$loop->cancelTimer($report);
51+
52+
printf("\r%d records in %0.3fs => %d records/s\n", $count, $now - $start, $count / ($now - $start));
53+
});
54+
55+
$loop->run();

0 commit comments

Comments
 (0)