|
| 1 | +<?php |
| 2 | + |
| 3 | +// getting reasonable results requires a large data set: |
| 4 | +// 1) download a large CSV data set, for example from https://github.com/fivethirtyeight/russian-troll-tweets |
| 5 | +// $ curl -OL https://github.com/fivethirtyeight/russian-troll-tweets/raw/master/IRAhandle_tweets_1.csv |
| 6 | +// |
| 7 | +// 2) If your data set it not already in gzip format, compress it: |
| 8 | +// $ gzip < IRAhandle_tweets_1.csv > IRAhandle_tweets_1.csv.gz |
| 9 | +// |
| 10 | +// 3) pipe compressed CSV into benchmark script: |
| 11 | +// $ php examples/92-benchmark-count-gzip.php < IRAhandle_tweets_1.csv.gz |
| 12 | + |
| 13 | +use Clue\React\Csv\AssocDecoder; |
| 14 | +use React\ChildProcess\Process; |
| 15 | +use React\EventLoop\Factory; |
| 16 | + |
| 17 | +require __DIR__ . '/../vendor/autoload.php'; |
| 18 | + |
| 19 | +if (extension_loaded('xdebug')) { |
| 20 | + echo 'NOTICE: The "xdebug" extension is loaded, this has a major impact on performance.' . PHP_EOL; |
| 21 | +} |
| 22 | + |
| 23 | +$loop = Factory::create(); |
| 24 | + |
| 25 | +// This benchmark example spawns the decompressor in a child `gunzip` process |
| 26 | +// because parsing CSV files is already mostly CPU-bound and multi-processing |
| 27 | +// is preferred here. If the input source is slower (such as an HTTP download) |
| 28 | +// or if `gunzip` is not available (Windows), using a built-in decompressor |
| 29 | +// such as https://github.com/clue/reactphp-zlib would be preferable. |
| 30 | +$process = new Process('exec gunzip', null, null, array( |
| 31 | + 0 => STDIN, |
| 32 | + 1 => array('pipe', 'w'), |
| 33 | + STDERR |
| 34 | +)); |
| 35 | +$process->start($loop); |
| 36 | +$decoder = new AssocDecoder($process->stdout); |
| 37 | + |
| 38 | +$count = 0; |
| 39 | +$decoder->on('data', function () use (&$count) { |
| 40 | + ++$count; |
| 41 | +}); |
| 42 | + |
| 43 | +$start = microtime(true); |
| 44 | +$report = $loop->addPeriodicTimer(0.05, function () use (&$count, $start) { |
| 45 | + printf("\r%d records in %0.3fs...", $count, microtime(true) - $start); |
| 46 | +}); |
| 47 | + |
| 48 | +$decoder->on('close', function () use (&$count, $report, $loop, $start) { |
| 49 | + $now = microtime(true); |
| 50 | + $loop->cancelTimer($report); |
| 51 | + |
| 52 | + printf("\r%d records in %0.3fs => %d records/s\n", $count, $now - $start, $count / ($now - $start)); |
| 53 | +}); |
| 54 | + |
| 55 | +$loop->run(); |
0 commit comments