@@ -130,7 +130,7 @@ function unicodeTrim($str) {
130130function mfNamesFromClass ($ class , $ prefix ='h- ' ) {
131131 $ class = str_replace (array (' ' , ' ' , "\n" ), ' ' , $ class );
132132 $ classes = explode (' ' , $ class );
133- $ classes = preg_grep ('#^[a-z\-]+ $# ' , $ classes );
133+ $ classes = preg_grep ('#^(h|p|u|dt|e)-( [a-z0-9]+-)?[a-z]+(-[a-z]+)* $# ' , $ classes );
134134 $ matches = array ();
135135
136136 foreach ($ classes as $ classname ) {
@@ -1257,16 +1257,15 @@ public function parseRelsAndAlternates() {
12571257
12581258 // Iterate through all a, area and link elements with rel attributes
12591259 foreach ($ this ->xpath ->query ('//a[@rel and @href] | //link[@rel and @href] | //area[@rel and @href] ' ) as $ hyperlink ) {
1260- if ($ hyperlink ->getAttribute ('rel ' ) == '' ) {
1260+ // Parse the set of rels for the current link
1261+ $ linkRels = array_unique (array_filter (preg_split ('/[\t\n\f\r ]/ ' , $ hyperlink ->getAttribute ('rel ' ))));
1262+ if (count ($ linkRels ) === 0 ) {
12611263 continue ;
12621264 }
12631265
12641266 // Resolve the href
12651267 $ href = $ this ->resolveUrl ($ hyperlink ->getAttribute ('href ' ));
12661268
1267- // Split up the rel into space-separated values
1268- $ linkRels = array_filter (explode (' ' , $ hyperlink ->getAttribute ('rel ' )));
1269-
12701269 $ rel_attributes = array ();
12711270
12721271 if ($ hyperlink ->hasAttribute ('media ' )) {
@@ -1285,8 +1284,8 @@ public function parseRelsAndAlternates() {
12851284 $ rel_attributes ['type ' ] = $ hyperlink ->getAttribute ('type ' );
12861285 }
12871286
1288- if ($ hyperlink ->nodeValue ) {
1289- $ rel_attributes ['text ' ] = $ hyperlink ->nodeValue ;
1287+ if (strlen ( $ hyperlink ->textContent ) > 0 ) {
1288+ $ rel_attributes ['text ' ] = $ hyperlink ->textContent ;
12901289 }
12911290
12921291 if ($ this ->enableAlternates ) {
@@ -1303,16 +1302,34 @@ public function parseRelsAndAlternates() {
13031302 }
13041303
13051304 foreach ($ linkRels as $ rel ) {
1306- $ rels [$ rel ][] = $ href ;
1305+ if (!array_key_exists ($ rel , $ rels )) {
1306+ $ rels [$ rel ] = array ($ href );
1307+ } elseif (!in_array ($ href , $ rels [$ rel ])) {
1308+ $ rels [$ rel ][] = $ href ;
1309+ }
13071310 }
13081311
1309- if (!in_array ($ href , $ rel_urls )) {
1310- $ rel_urls [$ href ] = array_merge (
1311- $ rel_attributes ,
1312- array ('rels ' => $ linkRels )
1313- );
1312+ if (!array_key_exists ($ href , $ rel_urls )) {
1313+ $ rel_urls [$ href ] = array ('rels ' => array ());
13141314 }
13151315
1316+ // Add the attributes collected only if they were not already set
1317+ $ rel_urls [$ href ] = array_merge (
1318+ $ rel_attributes ,
1319+ $ rel_urls [$ href ]
1320+ );
1321+
1322+ // Merge current rels with those already set
1323+ $ rel_urls [$ href ]['rels ' ] = array_merge (
1324+ $ rel_urls [$ href ]['rels ' ],
1325+ $ linkRels
1326+ );
1327+ }
1328+
1329+ // Alphabetically sort the rels arrays after removing duplicates
1330+ foreach ($ rel_urls as $ href => $ object ) {
1331+ $ rel_urls [$ href ]['rels ' ] = array_unique ($ rel_urls [$ href ]['rels ' ]);
1332+ sort ($ rel_urls [$ href ]['rels ' ]);
13161333 }
13171334
13181335 if (empty ($ rels ) and $ this ->jsonMode ) {
@@ -1321,8 +1338,8 @@ public function parseRelsAndAlternates() {
13211338
13221339 if (empty ($ rel_urls ) and $ this ->jsonMode ) {
13231340 $ rel_urls = new stdClass ();
1324- }
1325-
1341+ }
1342+
13261343 return array ($ rels , $ rel_urls , $ alternates );
13271344 }
13281345
0 commit comments