Skip to content

Commit f2289a8

Browse files
committed
Improve parsing of link relationships
* Parse the rel attribute in accordance with the WHATWG spec: https://infra.spec.whatwg.org/#split-on-ascii-whitespace * Only list unique rel values in the rel-urls output, fixes #159: microformats/microformats2-parsing#30 * Sort the unique rel values alphabetically: microformats/microformats2-parsing#29 * Correctly merge attribute values into the resulting object.
1 parent ebffee4 commit f2289a8

File tree

1 file changed

+24
-11
lines changed

1 file changed

+24
-11
lines changed

Mf2/Parser.php

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,16 +1250,15 @@ public function parseRelsAndAlternates() {
12501250

12511251
// Iterate through all a, area and link elements with rel attributes
12521252
foreach ($this->xpath->query('//a[@rel and @href] | //link[@rel and @href] | //area[@rel and @href]') as $hyperlink) {
1253-
if ($hyperlink->getAttribute('rel') == '') {
1253+
// Parse the set of rels for the current link
1254+
$linkRels = array_unique(array_filter(preg_split('/[\t\n\f\r ]/', $hyperlink->getAttribute('rel'))));
1255+
if (count($linkRels) === 0) {
12541256
continue;
12551257
}
12561258

12571259
// Resolve the href
12581260
$href = $this->resolveUrl($hyperlink->getAttribute('href'));
12591261

1260-
// Split up the rel into space-separated values
1261-
$linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel')));
1262-
12631262
$rel_attributes = array();
12641263

12651264
if ($hyperlink->hasAttribute('media')) {
@@ -1299,13 +1298,27 @@ public function parseRelsAndAlternates() {
12991298
$rels[$rel][] = $href;
13001299
}
13011300

1302-
if (!in_array($href, $rel_urls)) {
1303-
$rel_urls[$href] = array_merge(
1304-
$rel_attributes,
1305-
array('rels' => $linkRels)
1306-
);
1301+
if (!array_key_exists($href, $rel_urls)) {
1302+
$rel_urls[$href] = array('rels' => array());
13071303
}
13081304

1305+
// Add the attributes collected only if they were not already set
1306+
$rel_urls[$href] = array_merge(
1307+
$rel_attributes,
1308+
$rel_urls[$href]
1309+
);
1310+
1311+
// Merge current rels with those already set
1312+
$rel_urls[$href]['rels'] = array_merge(
1313+
$rel_urls[$href]['rels'],
1314+
$linkRels
1315+
);
1316+
}
1317+
1318+
// Alphabetically sort the rels arrays after removing duplicates
1319+
foreach ($rel_urls as $href => $object) {
1320+
$rel_urls[$href]['rels'] = array_unique($rel_urls[$href]['rels']);
1321+
sort($rel_urls[$href]['rels']);
13091322
}
13101323

13111324
if (empty($rels) and $this->jsonMode) {
@@ -1314,8 +1327,8 @@ public function parseRelsAndAlternates() {
13141327

13151328
if (empty($rel_urls) and $this->jsonMode) {
13161329
$rel_urls = new stdClass();
1317-
}
1318-
1330+
}
1331+
13191332
return array($rels, $rel_urls, $alternates);
13201333
}
13211334

0 commit comments

Comments
 (0)