Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 1996268

Browse files
committed
dependency updates
1 parent 20d568a commit 1996268

File tree

1 file changed

+71
-121
lines changed

1 file changed

+71
-121
lines changed

vendor/mf2/mf2/Mf2/Parser.php

Lines changed: 71 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ function nestedMfPropertyNamesFromClass($class) {
167167
}
168168
}
169169
}
170-
170+
171171
foreach ($propertyNames as $property => $prefixes) {
172172
$propertyNames[$property] = array_unique($prefixes);
173173
}
@@ -338,14 +338,14 @@ public function __construct($input, $url = null, $jsonMode = false) {
338338
libxml_use_internal_errors(true);
339339
if (is_string($input)) {
340340
if (class_exists('Masterminds\\HTML5')) {
341-
$doc = new \Masterminds\HTML5(array('disable_html_ns' => true));
342-
$doc = $doc->loadHTML($input);
341+
$doc = new \Masterminds\HTML5(array('disable_html_ns' => true));
342+
$doc = $doc->loadHTML($input);
343343
} else {
344344
$doc = new DOMDocument();
345345
@$doc->loadHTML(unicodeToHtmlEntities($input));
346346
}
347347
} elseif (is_a($input, 'DOMDocument')) {
348-
$doc = $input;
348+
$doc = clone $input;
349349
} else {
350350
$doc = new DOMDocument();
351351
@$doc->loadHTML('');
@@ -402,7 +402,7 @@ private function isElementParsed(\DOMElement $e, $prefix) {
402402
if (!$this->parsed->contains($e)) {
403403
return false;
404404
}
405-
405+
406406
$prefixes = $this->parsed[$e];
407407

408408
if (!in_array($prefix, $prefixes)) {
@@ -443,101 +443,49 @@ private function resolveChildUrls(DOMElement $el) {
443443
}
444444
}
445445

446-
public function textContent(DOMElement $el) {
447-
$excludeTags = array('noframe', 'noscript', 'script', 'style', 'frames', 'frameset');
448-
449-
if (isset($el->tagName) and in_array(strtolower($el->tagName), $excludeTags)) {
450-
return '';
451-
}
452-
453-
$this->resolveChildUrls($el);
454-
455-
$clonedEl = $el->cloneNode(true);
456-
457-
foreach ($this->xpath->query('.//img', $clonedEl) as $imgEl) {
458-
$newNode = $this->doc->createTextNode($imgEl->getAttribute($imgEl->hasAttribute('alt') ? 'alt' : 'src'));
459-
$imgEl->parentNode->replaceChild($newNode, $imgEl);
460-
}
461-
462-
foreach ($excludeTags as $tagName) {
463-
foreach ($this->xpath->query(".//{$tagName}", $clonedEl) as $elToRemove) {
464-
$elToRemove->parentNode->removeChild($elToRemove);
465-
}
466-
}
467-
468-
return $this->innerText($clonedEl);
446+
/**
447+
* The following two methods implements plain text parsing.
448+
* @see https://wiki.zegnat.net/media/textparsing.html
449+
**/
450+
public function textContent(DOMElement $element)
451+
{
452+
return preg_replace(
453+
'/(^[\t\n\f\r ]+| +(?=\n)|(?<=\n) +| +(?= )|[\t\n\f\r ]+$)/',
454+
'',
455+
$this->elementToString($element)
456+
);
469457
}
470-
471-
/**
472-
* This method attempts to return a better 'innerText' representation than DOMNode::textContent
473-
*
474-
* @param DOMElement|DOMText $el
475-
* @param bool $implied when parsing for implied name for h-*, rules may be slightly different
476-
* @see: https://github.com/glennjones/microformat-shiv/blob/dev/lib/text.js
477-
*/
478-
public function innerText($el, $implied=false) {
479-
$out = '';
480-
481-
$blockLevelTags = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'hr', 'pre', 'table',
482-
'address', 'article', 'aside', 'blockquote', 'caption', 'col', 'colgroup', 'dd', 'div',
483-
'dt', 'dir', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'header', 'hgroup', 'hr',
484-
'li', 'map', 'menu', 'nav', 'optgroup', 'option', 'section', 'tbody', 'testarea',
485-
'tfoot', 'th', 'thead', 'tr', 'td', 'ul', 'ol', 'dl', 'details');
486-
487-
$excludeTags = array('noframe', 'noscript', 'script', 'style', 'frames', 'frameset');
488-
489-
// PHP DOMDocument doesn’t correctly handle whitespace around elements it doesn’t recognise.
490-
$unsupportedTags = array('data');
491-
492-
if (isset($el->tagName)) {
493-
if (in_array(strtolower($el->tagName), $excludeTags)) {
494-
return $out;
495-
} else if ($el->tagName == 'img') {
496-
if ($el->hasAttribute('alt')) {
497-
return $el->getAttribute('alt');
498-
} else if (!$implied && $el->hasAttribute('src')) {
499-
return $this->resolveUrl($el->getAttribute('src'));
500-
}
501-
} else if ($el->tagName == 'area' and $el->hasAttribute('alt')) {
502-
return $el->getAttribute('alt');
503-
} else if ($el->tagName == 'abbr' and $el->hasAttribute('title')) {
504-
return $el->getAttribute('title');
505-
}
506-
}
507-
508-
// if node is a text node get its text
509-
if (isset($el->nodeType) && $el->nodeType === 3) {
510-
$out .= $el->textContent;
511-
}
512-
513-
// get the text of the child nodes
514-
if ($el->childNodes && $el->childNodes->length > 0) {
515-
for ($j = 0; $j < $el->childNodes->length; $j++) {
516-
$text = $this->innerText($el->childNodes->item($j), $implied);
517-
if (!is_null($text)) {
518-
$out .= $text;
519-
}
520-
}
521-
}
522-
523-
if (isset($el->tagName)) {
524-
// if its a block level tag add an additional space at the end
525-
if (in_array(strtolower($el->tagName), $blockLevelTags)) {
526-
$out .= ' ';
527-
} elseif ($implied and in_array(strtolower($el->tagName), $unsupportedTags)) {
528-
$out .= ' ';
529-
} else if (strtolower($el->tagName) == 'br') {
530-
// else if its a br, replace with newline
531-
$out .= "\n";
458+
private function elementToString(DOMElement $input)
459+
{
460+
$output = '';
461+
foreach ($input->childNodes as $child) {
462+
if ($child->nodeType === XML_TEXT_NODE) {
463+
$output .= str_replace(array("\t", "\n", "\r") , ' ', $child->textContent);
464+
} else if ($child->nodeType === XML_ELEMENT_NODE) {
465+
$tagName = strtoupper($child->tagName);
466+
if (in_array($tagName, array('SCRIPT', 'STYLE'))) {
467+
continue;
468+
} else if ($tagName === 'IMG') {
469+
if ($child->hasAttribute('alt')) {
470+
$output .= ' ' . trim($child->getAttribute('alt'), "\t\n\f\r ") . ' ';
471+
} else if ($child->hasAttribute('src')) {
472+
$output .= ' ' . $this->resolveUrl(trim($child->getAttribute('src'), "\t\n\f\r ")) . ' ';
473+
}
474+
} else if ($tagName === 'BR') {
475+
$output .= "\n";
476+
} else if ($tagName === 'P') {
477+
$output .= "\n" . $this->elementToString($child);
478+
} else {
479+
$output .= $this->elementToString($child);
480+
}
481+
}
532482
}
533-
}
534-
535-
return ($out === '') ? NULL : $out;
483+
return $output;
536484
}
537485

538486
/**
539487
* This method parses the language of an element
540-
* @param DOMElement $el
488+
* @param DOMElement $el
541489
* @access public
542490
* @return string
543491
*/
@@ -547,7 +495,7 @@ public function language(DOMElement $el)
547495
if ($el->hasAttribute('lang')) {
548496
return unicodeTrim($el->getAttribute('lang'));
549497
}
550-
498+
551499
if ($el->tagName == 'html') {
552500
// we're at the <html> element and no lang; check <meta> http-equiv Content-Language
553501
foreach ( $this->xpath->query('.//meta[@http-equiv]') as $node )
@@ -558,7 +506,7 @@ public function language(DOMElement $el)
558506
}
559507
} elseif ($el->parentNode instanceof DOMElement) {
560508
// check the parent node
561-
return $this->language($el->parentNode);
509+
return $this->language($el->parentNode);
562510
}
563511

564512
return '';
@@ -648,7 +596,7 @@ public function parseP(\DOMElement $p) {
648596
} elseif (in_array($p->tagName, array('data', 'input')) and $p->hasAttribute('value')) {
649597
$pValue = $p->getAttribute('value');
650598
} else {
651-
$pValue = unicodeTrim($this->innerText($p));
599+
$pValue = $this->textContent($p);
652600
}
653601

654602
return $pValue;
@@ -670,23 +618,16 @@ public function parseU(\DOMElement $u) {
670618
$uValue = $u->getAttribute('poster');
671619
} elseif ($u->tagName == 'object' and $u->hasAttribute('data')) {
672620
$uValue = $u->getAttribute('data');
673-
}
674-
675-
if (isset($uValue)) {
676-
return $this->resolveUrl($uValue);
677-
}
678-
679-
$classTitle = $this->parseValueClassTitle($u);
680-
681-
if ($classTitle !== null) {
682-
return $classTitle;
621+
} elseif (($classTitle = $this->parseValueClassTitle($u)) !== null) {
622+
$uValue = $classTitle;
683623
} elseif (($u->tagName == 'abbr' or $u->tagName == 'link') and $u->hasAttribute('title')) {
684-
return $u->getAttribute('title');
624+
$uValue = $u->getAttribute('title');
685625
} elseif (in_array($u->tagName, array('data', 'input')) and $u->hasAttribute('value')) {
686-
return $u->getAttribute('value');
626+
$uValue = $u->getAttribute('value');
687627
} else {
688-
return unicodeTrim($this->textContent($u));
628+
$uValue = $this->textContent($u);
689629
}
630+
return $this->resolveUrl($uValue);
690631
}
691632

692633
/**
@@ -861,7 +802,7 @@ public function parseDT(\DOMElement $dt, &$dates = array(), &$impliedTimezone =
861802

862803
$dtValue = unicodeTrim($dtValue);
863804

864-
// Store the date part so that we can use it when assembling the final timestamp if the next one is missing a date part
805+
// Store the date part so that we can use it when assembling the final timestamp if the next one is missing a date part
865806
if (preg_match('/(\d{4}-\d{2}-\d{2})/', $dtValue, $matches)) {
866807
$dates[] = $matches[0];
867808
}
@@ -912,11 +853,13 @@ public function parseE(\DOMElement $e) {
912853
}
913854
$html = $e->ownerDocument->saveHtml($innerNodes);
914855
// Put the nodes back in place.
915-
$e->appendChild($innerNodes);
856+
if($innerNodes->hasChildNodes()) {
857+
$e->appendChild($innerNodes);
858+
}
916859

917860
$return = array(
918861
'html' => unicodeTrim($html),
919-
'value' => unicodeTrim($this->innerText($e)),
862+
'value' => $this->textContent($e),
920863
);
921864

922865
if($this->lang) {
@@ -970,7 +913,7 @@ public function parseH(\DOMElement $e, $is_backcompat = false, $has_nested_mf =
970913

971914
// Handle p-*
972915
foreach ($this->xpath->query('.//*[contains(concat(" ", @class) ," p-")]', $e) as $p) {
973-
// element is already parsed
916+
// element is already parsed
974917
if ($this->isElementParsed($p, 'p')) {
975918
continue;
976919
// backcompat parsing and element was not upgraded; skip it
@@ -1123,7 +1066,7 @@ public function parseH(\DOMElement $e, $is_backcompat = false, $has_nested_mf =
11231066
}
11241067
}
11251068

1126-
throw new Exception($this->innerText($e, true));
1069+
throw new Exception($this->textContent($e, true));
11271070
} catch (Exception $exc) {
11281071
$return['name'][] = unicodeTrim($exc->getMessage());
11291072
}
@@ -1175,6 +1118,11 @@ public function parseH(\DOMElement $e, $is_backcompat = false, $has_nested_mf =
11751118
$mfTypes = array_unique($mfTypes);
11761119
sort($mfTypes);
11771120

1121+
// Properties should be an object when JSON serialised
1122+
if (empty($return) and $this->jsonMode) {
1123+
$return = new stdClass();
1124+
}
1125+
11781126
// Phew. Return the final result.
11791127
$parsed = array(
11801128
'type' => $mfTypes,
@@ -1218,8 +1166,8 @@ public function parseImpliedPhoto(\DOMElement $e) {
12181166
$xpaths = array(
12191167
'./img',
12201168
'./object',
1221-
'./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/img',
1222-
'./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/object',
1169+
'./*[not(contains(concat(" ", @class), " h-"))]/img[count(preceding-sibling::img)+count(following-sibling::img)=0]',
1170+
'./*[not(contains(concat(" ", @class), " h-"))]/object[count(preceding-sibling::object)+count(following-sibling::object)=0]',
12231171
);
12241172

12251173
foreach ($xpaths as $path) {
@@ -1351,7 +1299,7 @@ public function parseRelsAndAlternates() {
13511299

13521300
/**
13531301
* Find rel=tag elements that don't have class=category and have an href.
1354-
* For each element, get the last non-empty URL segment. Append a <data>
1302+
* For each element, get the last non-empty URL segment. Append a <data>
13551303
* element with that value as the category. Uses the mf1 class 'category'
13561304
* which will then be upgraded to p-category during backcompat.
13571305
* @param DOMElement $el
@@ -1553,6 +1501,8 @@ public function backcompat(DOMElement $el, $context = '', $isParentMf2 = false)
15531501
$mf1Classes = array_intersect($classes, array_keys($this->classicRootMap));
15541502
}
15551503

1504+
$elHasMf2 = $this->hasRootMf2($el);
1505+
15561506
foreach ($mf1Classes as $classname) {
15571507
// special handling for specific properties
15581508
switch ( $classname )
@@ -1647,7 +1597,7 @@ public function backcompat(DOMElement $el, $context = '', $isParentMf2 = false)
16471597
}
16481598
}
16491599

1650-
if ( empty($context) && isset($this->classicRootMap[$classname]) && !$this->hasRootMf2($el) ) {
1600+
if ( empty($context) && isset($this->classicRootMap[$classname]) && !$elHasMf2 ) {
16511601
$this->addMfClasses($el, $this->classicRootMap[$classname]);
16521602
}
16531603
}
@@ -2155,8 +2105,8 @@ function resolveUrl($baseURI, $referenceURI) {
21552105

21562106
# 5.2.1 Pre-parse the Base URI
21572107
# The base URI (Base) is established according to the procedure of
2158-
# Section 5.1 and parsed into the five main components described in
2159-
# Section 3
2108+
# Section 5.1 and parsed into the five main components described in
2109+
# Section 3
21602110
$base = parseUriToComponents($baseURI);
21612111

21622112
# If base path is blank (http://example.com) then set it to /

0 commit comments

Comments
 (0)