Skip to content

Commit 8207f66

Browse files
committed
readability support
1 parent 780fb50 commit 8207f66

File tree

5 files changed

+69
-12
lines changed

5 files changed

+69
-12
lines changed

src/NlpClient.php

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,26 @@ public function newspaperUrl( $url )
7070
return ( !empty($data['newspaper']) ) ? $data['newspaper'] : null;
7171
}
7272

73+
74+
/**
75+
* Readability Article Extraction from URL
76+
*/
77+
public function readabilityUrl( $url )
78+
{
79+
$data = $this->get_call('/readability', ['url' => $url ] );
80+
81+
return ( !empty($data['data']) ) ? $data['data'] : null;
82+
}
83+
84+
/**
85+
* Readability Article Extraction from HTML
86+
*/
87+
public function readabilityHTML( $html )
88+
{
89+
$data = $this->post_call('/readability', ['html' => $html ] );
90+
91+
return ( !empty($data['data']) ) ? $data['data'] : null;
92+
}
7393

7494
/**
7595
* Get neighbouring words

tests/Unit/CoreNlpTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ class CoreNlpTest extends TestCase
1010
public function test_core_nlp()
1111
{
1212
$corenlp = new \Web64\Nlp\CoreNlp();
13-
echo PHP_EOL. PHP_EOL;
13+
//echo PHP_EOL. PHP_EOL;
1414
$text = "Catalonia: Ex-police chief Trapero charged with sedition. The former chief of Catalonia's police force, Josep Lluis Trapero, has been charged over events linked with last year's independence referendum.";
1515

1616
$text = "German Foreign Ministry investigating arrest of German reporter in Turkey.

tests/Unit/NewspaperTest.php

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,26 @@
66

77
class NewspaperTest extends TestCase
88
{
9-
// /** @test */
10-
// public function url_article_extraction()
11-
// {
12-
// $nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
9+
/** @test */
10+
public function url_article_extraction()
11+
{
12+
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
1313

14-
// $newspaper = $nlp->newspaperUrl('http://www.bbc.com/news/science-environment-43710766');
14+
$newspaper = $nlp->newspaperUrl('https://github.com/web64/nlpserver');
1515

16-
// $this->msg( $newspaper );
17-
// $this->assertNotEmpty($newspaper);
18-
// }
16+
$this->msg( $newspaper );
17+
$this->assertNotEmpty($newspaper);
18+
}
1919

2020
/** @test */
2121
public function html_article_extraction()
2222
{
2323
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
2424

25-
$html = file_get_contents( 'http://www.bbc.com/news/science-environment-43710766' );
25+
$html = file_get_contents( 'https://github.com/web64/nlpserver' );
2626
$newspaper = $nlp->newspaperHtml( $html );
2727

28-
//$this->msg( $newspaper );
28+
$this->msg( $newspaper );
2929

3030
$this->assertNotEmpty($newspaper);
3131
$this->assertNotEmpty($newspaper['title']);

tests/Unit/PolyglotTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public function entity_extraction()
1818

1919
$polyglot = $nlp->polyglot( $text, 'en' );
2020

21-
//$this->msg( $polyglot );
21+
$this->msg( $polyglot );
2222

2323
$this->assertNotEmpty( $polyglot->data );
2424
$this->assertArrayHasKey('sentiment', $polyglot->data, "Missing sentiment");

tests/Unit/ReadabilityTest.php

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<?php
2+
3+
namespace Tests\Unit;
4+
5+
use Tests\TestCase;
6+
7+
class ReadabilityTest extends TestCase
8+
{
9+
/** @test */
10+
public function readability_url_article_extraction()
11+
{
12+
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
13+
14+
$article = $nlp->readabilityUrl('https://github.com/web64/nlpserver');
15+
16+
17+
//$this->msg( $article );
18+
$this->assertNotEmpty($article);
19+
}
20+
21+
/** @test */
22+
public function readability_html_article_extraction()
23+
{
24+
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
25+
26+
$html = file_get_contents( 'https://github.com/web64/nlpserver' );
27+
$article = $nlp->readabilityHtml( $html );
28+
29+
//$this->msg( $article );
30+
31+
$this->assertNotEmpty($article);
32+
$this->assertNotEmpty($article['title']);
33+
$this->assertNotEmpty($article['short_title']);
34+
$this->assertNotEmpty($article['content']);
35+
$this->assertNotEmpty($article['summary']);
36+
}
37+
}

0 commit comments

Comments
 (0)