Skip to content

Commit 9dbb991

Browse files
committed
Added method to parse language from HTML as 'html-lang' key + tests for #96.
1 parent 0ccc493 commit 9dbb991

File tree

2 files changed

+131
-1
lines changed

2 files changed

+131
-1
lines changed

Mf2/Parser.php

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,35 @@ public function innerText($el, $implied=false) {
451451
return ($out === '') ? NULL : $out;
452452
}
453453

454+
/**
455+
* This method parses the language of an element
456+
* @param DOMElement $el
457+
* @access public
458+
* @return string
459+
*/
460+
public function language(DOMElement $el)
461+
{
462+
// element has a lang attribute; use it
463+
if ($el->hasAttribute('lang')) {
464+
return trim($el->getAttribute('lang'));
465+
}
466+
467+
if ($el->tagName == 'html') {
468+
// we're at the <html> element and no lang; check <meta> http-equiv Content-Language
469+
foreach ( $this->xpath->query('.//meta[@http-equiv]') as $node )
470+
{
471+
if ($node->hasAttribute('http-equiv') && $node->hasAttribute('content') && strtolower($node->getAttribute('http-equiv')) == 'content-language') {
472+
return trim($node->getAttribute('content'));
473+
}
474+
}
475+
} else {
476+
// check the parent node
477+
return $this->language($el->parentNode);
478+
}
479+
480+
return '';
481+
} # end method language()
482+
454483
// TODO: figure out if this has problems with sms: and geo: URLs
455484
public function resolveUrl($url) {
456485
// If the URL is seriously malformed it’s probably beyond the scope of this
@@ -741,7 +770,8 @@ public function parseE(\DOMElement $e) {
741770

742771
return array(
743772
'html' => $html,
744-
'value' => unicodeTrim($this->innerText($e))
773+
'value' => unicodeTrim($this->innerText($e)),
774+
'html-lang' => $this->language($e)
745775
);
746776
}
747777

@@ -1000,6 +1030,9 @@ public function parseH(\DOMElement $e) {
10001030
$return['url'][] = $this->resolveUrl($url);
10011031
}
10021032

1033+
// Language
1034+
$return['html-lang'] = $this->language($e);
1035+
10031036
// Make sure things are in alphabetical order
10041037
sort($mfTypes);
10051038

tests/Mf2/ParseLanguageTest.php

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
<?php
2+
3+
/**
4+
* Tests of the language parsing methods within mf2\Parser
5+
*/
6+
7+
namespace Mf2\Parser\Test;
8+
9+
use Mf2\Parser;
10+
use Mf2;
11+
use PHPUnit_Framework_TestCase;
12+
13+
class ParseLanguageTest extends PHPUnit_Framework_TestCase {
14+
15+
public function setUp() {
16+
date_default_timezone_set('Europe/London');
17+
}
18+
19+
/**
20+
* Test with only <html lang>
21+
*/
22+
public function testHtmlLangOnly()
23+
{
24+
$input = '<html lang="en"> <div class="h-entry">This test is in English.</div> </html>';
25+
$parser = new Parser($input);
26+
$result = $parser->parse();
27+
28+
$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
29+
} # end method testHtmlLangOnly()
30+
31+
/**
32+
* Test with only h-entry lang
33+
*/
34+
public function testHEntryLangOnly()
35+
{
36+
$input = '<html> <div class="h-entry" lang="en">This test is in English.</div> </html>';
37+
$parser = new Parser($input);
38+
$result = $parser->parse();
39+
40+
$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
41+
} # end method testHEntryLangOnly()
42+
43+
/**
44+
* Test with different <html lang> and h-entry lang
45+
*/
46+
public function testHtmlAndHEntryLang()
47+
{
48+
$input = '<html lang="en"> <div class="h-entry" lang="es">Esta prueba está en español.</div> </html>';
49+
$parser = new Parser($input);
50+
$result = $parser->parse();
51+
52+
$this->assertEquals('es', $result['items'][0]['properties']['html-lang']);
53+
} # end method testHtmlAndHEntryLang()
54+
55+
/**
56+
* Test with different <html lang>, h-entry lang, and h-entry without lang,
57+
* which should inherit from the <html lang>
58+
*/
59+
public function testMultiLanguageInheritance()
60+
{
61+
$input = '<html lang="en"> <div class="h-entry">This test is in English.</div> <div class="h-entry" lang="es">Esta prueba está en español.</div> </html>';
62+
$parser = new Parser($input);
63+
$result = $parser->parse();
64+
65+
$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
66+
$this->assertEquals('es', $result['items'][1]['properties']['html-lang']);
67+
} # end method testMultiLanguageInheritance()
68+
69+
/**
70+
* Test feed with .h-feed lang which contains multiple h-entries of different languages
71+
* (or none specified), which should inherit from the .h-feed lang.
72+
*/
73+
public function testMultiLanguageFeed()
74+
{
75+
$input = '<html> <div class="h-feed" lang="en"> <h1 class="p-name">Test Feed</h1> <div class="h-entry">This test is in English.</div> <div class="h-entry" lang="es">Esta prueba está en español.</div> <div class="h-entry" lang="fr">Ce test est en français.</div> </html>';
76+
$parser = new Parser($input);
77+
$result = $parser->parse();
78+
79+
$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
80+
$this->assertEquals('en', $result['items'][0]['children'][0]['properties']['html-lang']);
81+
$this->assertEquals('es', $result['items'][0]['children'][1]['properties']['html-lang']);
82+
$this->assertEquals('fr', $result['items'][0]['children'][2]['properties']['html-lang']);
83+
} # end method testMultiLanguageFeed()
84+
85+
/**
86+
* Test with language specified in <meta> http-equiv Content-Language
87+
*/
88+
public function testMetaContentLanguage()
89+
{
90+
$input = '<html> <meta http-equiv="Content-Language" content="es"/> <div class="h-entry">Esta prueba está en español.</div> </html>';
91+
$parser = new Parser($input);
92+
$result = $parser->parse();
93+
94+
$this->assertEquals('es', $result['items'][0]['properties']['html-lang']);
95+
} # end method testMetaContentLanguage()
96+
97+
}

0 commit comments

Comments
 (0)