Skip to content

Commit f06531c

Browse files
committed
fix headless browser bugs: lots of chrome child processes keep growing as page resources are not effectively freed
1 parent eb10a37 commit f06531c

File tree

6 files changed

+87
-29
lines changed

6 files changed

+87
-29
lines changed

Examples/start.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -391,18 +391,18 @@ function startAppDownloader()
391391
};
392392

393393
//使用无头浏览器回调或者直接使用无头浏览器相关API
394-
$downloader->onHeadlessBrowserOpenPage = function($downloader, $browser, $page, $url){
394+
/*$downloader->onHeadlessBrowserOpenPage = function($downloader, $browser, $page, $url){
395395
//注意:灵活设计特定类型的返回值有助于对付各种复杂的应用场景
396396
//1. 返回false, 会触发中断后续的业务逻辑;
397397
//2. 返回string,会触发中断后续的业务逻辑,一般多用于返回页面的HTML;
398398
//3. 返回array, 会继续执行后续的业务逻辑,一般多用于返回无头浏览器选项参数;
399399
//4. 返回其他, 会继续执行后续的业务逻辑,相当于是什么也没有发生;
400400
401401
//注意:一般无需调用如下几行代码,因为爬山虎内部默认会自动调用无头API做同样的工作.
402-
//$page->navigate($url)->waitForNavigation('firstMeaningfulPaint');
403-
//$html = $page->getHtml();
404-
//return $html;
405-
};
402+
$page->navigate($url)->waitForNavigation('firstMeaningfulPaint');
403+
$html = $page->getHtml();
404+
return $html;
405+
};*/
406406
}
407407

408408

src/Downloader.php

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -841,30 +841,30 @@ public function useHeadlessBrowser($args = [])
841841
$options['track_request_args'] = !empty($args['track_request_args']) ? true : false;
842842
$merged_options = $this->headlessBrowser->getMergedOptions($options);
843843

844+
//maybe issue request by user callback
844845
try{
845-
$this->headlessBrowser->getBrowserFactoryInstance()->setOptions($merged_options);
846-
$browser = $this->headlessBrowser->getBrowserInstance();
847-
$page = $this->headlessBrowser->getPage();
846+
if(property_exists($this, 'onHeadlessBrowserOpenPage')){
847+
$this->headlessBrowser->getBrowserFactoryInstance()->setOptions($merged_options);
848+
$browser = $this->headlessBrowser->getBrowserInstance();
849+
$page = $this->headlessBrowser->getPage();
850+
$returning = $this->triggerUserCallback('onHeadlessBrowserOpenPage', $this, $browser, $page, $args['url']);
851+
if(false === $returning){
852+
$page->close();
853+
return Tool::throwback('-353', $this->langConfig['downloader_download_task_no'] . '(NaN)', $extra);
854+
}elseif(is_string($returning)){
855+
$page->close();
856+
$extra = ['content' => $returning];
857+
return Tool::throwback('0', $this->langConfig['downloader_download_task_yes'], $extra);
858+
}else{
859+
$page->close();
860+
is_array($returning) && $merged_options = array_merge($merged_options, $returning);
861+
}
862+
}
848863
}catch(\Throwable $e){
849864
$msg = $e->getMessage();
850865
return Tool::throwback('-352', $this->langConfig['headless_browser_exception'] . " $msg ", $extra);
851866
}
852867

853-
$returning = $this->triggerUserCallback('onHeadlessBrowserOpenPage', $this, $browser, $page, $args['url']);
854-
855-
//maybe issue request by user callback
856-
if(false === $returning){
857-
$page->close();
858-
return Tool::throwback('-353', $this->langConfig['downloader_download_task_no'] . '(NaN)', $extra);
859-
}elseif(is_string($returning)){
860-
$page->close();
861-
$extra = ['content' => $returning];
862-
return Tool::throwback('0', $this->langConfig['downloader_download_task_yes'], $extra);
863-
}else{
864-
$page->close();
865-
is_array($returning) && $merged_options = array_merge($merged_options, $returning);
866-
}
867-
868868
//maybe issue request by API directly
869869
try{
870870
$content = $this->headlessBrowser->request($args['method'], $args['url'], $merged_options);

src/Kernel/Library/Helper/Tool.php

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,6 +1383,31 @@ public static function executeShell($command = '')
13831383
}
13841384
}
13851385

1386+
/**
1387+
* @brief convertRawHeaderToArray
1388+
*
1389+
* @param string $header
1390+
*
1391+
* @return array
1392+
*/
1393+
public static function convertRawHeaderToArray($header = '')
1394+
{
1395+
if(!is_string($header)) return [];
1396+
1397+
$_header = [];
1398+
$header = explode("\n", $header);
1399+
foreach($header as $k => $v)
1400+
{
1401+
$tmp = explode(":", $v);
1402+
if(empty($tmp[0]) || empty($tmp[1])) continue;
1403+
if(!empty($tmp[0]) && preg_match("/(CONNECT|GET|POST|PUT|DELETE)/s", $tmp[0])) continue;
1404+
$k1 = trim($tmp[0]);
1405+
$v1 = trim($tmp[1]);
1406+
$_header[$k1] = $v1;
1407+
}
1408+
1409+
return $_header;
1410+
}
13861411
}
13871412

13881413

src/Kernel/Middleware/HeadlessBrowser/Chrome.php

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,19 @@ public function getBrowserInstance()
173173
return self::$_browser;
174174
}
175175

176+
/**
177+
* @brief destroy
178+
*
179+
* @return void
180+
*/
181+
public function destroy()
182+
{
183+
if(empty(self::$_browser) || !is_object(self::$_browser)) return;
184+
185+
self::$_browser->close();
186+
self::$_browser = null;
187+
}
188+
176189
/**
177190
* @brief get default arguments
178191
*
@@ -251,9 +264,15 @@ public function request($method, $url, $args = [])
251264

252265
//issue http request
253266
$page = self::getPage();
254-
$page->navigate($url)->waitForNavigation($page_event, $navigate_timeout);
255-
$html = $page->getHtml();
256-
$page->close();
267+
268+
try{
269+
$page->navigate($url)->waitForNavigation($page_event, $navigate_timeout);
270+
$html = $page->getHtml();
271+
$page->close();
272+
}catch(\Throwable $e){
273+
$page->close();
274+
throw new \Exception($e->getMessage());
275+
}
257276

258277
return $html;
259278
}
@@ -290,12 +309,19 @@ public function setOptions($options = [])
290309
/**
291310
* @brief set http header
292311
*
293-
* @param array $headers
312+
* @param array|string $headers
294313
*
295314
* @return object
296315
*/
297316
public function setHeaders($headers = [])
298317
{
318+
if(!is_string($headers) && !is_array($headers)) return $this;
319+
320+
if(is_string($headers))
321+
{
322+
$headers = Tool::convertRawHeaderToArray($headers);
323+
}
324+
299325
!empty($headers) && self::$_config['headers'] = $headers;
300326

301327
return $this;

src/Kernel/Middleware/HttpClient/Guzzle.php

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,12 +358,19 @@ public function setOptions($options = [])
358358
/**
359359
* @brief set http header
360360
*
361-
* @param array $headers
361+
* @param array|string $headers
362362
*
363363
* @return object
364364
*/
365365
public function setHeaders($headers = [])
366366
{
367+
if(!is_string($headers) && !is_array($headers)) return $this;
368+
369+
if(is_string($headers))
370+
{
371+
$headers = Tool::convertRawHeaderToArray($headers);
372+
}
373+
367374
!empty($headers) && self::$_config['headers'] = $headers;
368375

369376
return $this;

src/Kernel/PHPCreeper.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class PHPCreeper extends Worker
4040
*
4141
* @var string
4242
*/
43-
public const CURRENT_VERSION = '1.9.6';
43+
public const CURRENT_VERSION = '1.9.7';
4444

4545
/**
4646
* engine name

0 commit comments

Comments
 (0)