Skip to content

Commit d384f95

Browse files
committed
secutiry improvement and add new callback API with onDownloaderConnectToParser for downloader worker
1 parent 9eb7007 commit d384f95

File tree

5 files changed

+28
-7
lines changed

5 files changed

+28
-7
lines changed

Docs/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ return array(
108108
//set the max number of the task queue, 0 indicates no limit (optional, default `0`)
109109
'max_number' => 1000,
110110

111+
//specifies the max number of connections each downloader process can connect to the parser
112+
//(optional, default `1`, minimum value 1, maximum value 1000)
113+
//'max_connections' => 1,
114+
111115
//set the max number of the request for each socket connection,
112116
//if the cumulative number of socket requests exceeds the max number of requests,
113117
//the parser will close the connection and try to reconect automatically.

Examples/start.php

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,12 @@
177177
//任务队列最大task数量, 0代表无限制 (可选项,默认0)
178178
//'max_number' => 1000,
179179

180+
//特指每个下载器进程可以建立到解析器的最大连接数 (可选项,默认1,最小值为1,最大值为1000)
181+
//'max_connections' => 1,
182+
180183
//当前Socket连接累计最大请求数,0代表无限制 (可选项,默认0)
181184
//如果当前Socket连接的累计请求数超过最大请求数时,
182-
//parser端会主动关闭连接,同时客户端会自动尝试重连
185+
//parser端会主动关闭连接,同时客户端会自动尝试重连.
183186
//'max_request' => 1000,
184187

185188
//限定爬取站点域,留空表示不受限
@@ -355,6 +358,10 @@ function startAppDownloader()
355358
$downloader->onDownloaderStart = function($downloader){
356359
};
357360

361+
$downloader->onDownloaderConnectToParser = function($connection){
362+
//$connection->bufferFull = true;
363+
};
364+
358365
//回调【onBeforeDownload】的新增别名是【onDownloadBefore】
359366
$downloader->onDownloadBefore = function($downloader, $task){
360367
//disable http ssl verify in any of the following two ways
@@ -377,7 +384,9 @@ function startAppDownloader()
377384
//pprint($error, $task);
378385
};
379386

380-
$downloader->onTaskEmpty = function($downloader){
387+
//回调【onTaskEmpty】的新增别名是【onDownloadTaskEmpty】
388+
$downloader->onDownloadTaskEmpty= function($downloader){
389+
//$downloader->removeTimer();
381390
};
382391

383392
//使用无头浏览器回调或者直接使用无头浏览器相关API

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ $config['redis'] = [
153153
$config['task'] = array(
154154
//'crawl_interval' => 1,
155155
//'max_number' => 1000,
156+
//'max_connections' => 1,
156157
//'max_request' => 1000,
157158
'context' => [
158159
'cache_enabled' => true,
@@ -285,10 +286,11 @@ function startAppDownloader()
285286
//$downloader->onDownloaderStart = function($downloader){};
286287
//$downloader->onDownloaderStop = function($downloader){};
287288
//$downloader->onDownloaderMessage = function($downloader, $parser_reply){};
289+
//$downloader->onDownloaderConnectToParser = function($connection){};
288290
//$downloader->onDownloadStart = function($downloader, $task){};
289291
//$downloader->onDownloadAfter = function($downloader, $download_data, $task){};
290292
//$downloader->onDownloadFail = function($downloader, $error, $task){};
291-
//$downloader->onTaskEmpty = function($downloader){};
293+
//$downloader->onDownloadTaskEmpty = function($downloader){};
292294
}
293295

294296
function startAppParser()

src/Downloader.php

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ public function onWorkerStart($worker)
123123
}
124124

125125
//try to connect to parser asynchronously then execute consumeOneTask by interval
126-
$this->connectToParser();
126+
$this->_connectToParser();
127127
}
128128

129129
/**
@@ -158,7 +158,7 @@ public function onWorkerReload($worker)
158158
*
159159
* @return void
160160
*/
161-
public function connectToParser()
161+
private function _connectToParser()
162162
{
163163
//get all task connections
164164
$task_connections = $this->getAsyncTaskConnection();
@@ -230,6 +230,10 @@ public function onConnectToParser($connection)
230230
'downloader_client_address' => 'tcp://' . $connection->getLocalAddress(),
231231
]));
232232

233+
//trigger callback
234+
$returning = $this->triggerUserCallback('onDownloaderConnectToParser', $connection);
235+
if(false === $returning) return false;
236+
233237
//install task timer
234238
$connection->taskTimerId = Timer::add($this->getTaskCrawlInterval(), [$this, 'consumeOneTask'], [$connection->channel], 1);
235239

src/Kernel/PHPCreeper.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class PHPCreeper extends Worker
4040
*
4141
* @var string
4242
*/
43-
public const CURRENT_VERSION = '1.9.3';
43+
public const CURRENT_VERSION = '1.9.4';
4444

4545
/**
4646
* engine name
@@ -324,6 +324,8 @@ class PHPCreeper extends Worker
324324
'onDownloaderStop' => null,
325325
'onDownloaderReload' => null,
326326
'onDownloaderMessage' => null,
327+
'onDownloaderConnectToParser' => null,
328+
'onTaskEmpty' => null,
327329
'onBeforeDownload' => null,
328330
'onStartDownload' => null,
329331
'onAfterDownload' => null,
@@ -344,7 +346,6 @@ class PHPCreeper extends Worker
344346
'onServerBufferFull' => null,
345347
'onServerBufferDrain' => null,
346348
'onServerError' => null,
347-
'onTaskEmpty' => null,
348349
'onHeadlessBrowserOpenPage' => null,
349350
);
350351

@@ -364,6 +365,7 @@ class PHPCreeper extends Worker
364365
'onStartDownload' => ['onDownloadStart'],
365366
'onAfterDownload' => ['onDownloadAfter'],
366367
'onFailDownload' => ['onDownloadFail'],
368+
'onTaskEmpty' => ['onDownloadTaskEmpty'],
367369
);
368370

369371
/**

0 commit comments

Comments
 (0)