|
2 | 2 |
|
3 | 3 | namespace VV\Classify; |
4 | 4 |
|
| 5 | +use Symfony\Component\DomCrawler\Crawler; |
| 6 | + |
5 | 7 | class HtmlParser implements ClassifyParser |
6 | 8 | { |
7 | 9 | public function parse(Tag $tag, string $value): string |
8 | 10 | { |
9 | | - return preg_replace( |
10 | | - $this->defineRegexPattern($tag), |
11 | | - $this->defineReplacement($tag), |
12 | | - $value |
13 | | - ); |
14 | | - } |
| 11 | + $selector = $tag->tag; |
15 | 12 |
|
16 | | - private function defineRegexPattern(Tag $tag): string |
17 | | - { |
18 | | - $pattern = ''; |
| 13 | + if (count($tag->before) > 0) { |
| 14 | + $selector = implode(' > ', $tag->before).' > '.$selector; |
19 | 15 |
|
20 | | - foreach ($tag->before as $name) { |
21 | | - $pattern .= "<{$name}[^>]*>[^<]*"; |
| 16 | + $firstPart = strtolower($tag->before[0]); |
| 17 | + |
| 18 | + // Guard against producing selectors in the form: body > body > span. |
| 19 | + if ($firstPart != 'body') { |
| 20 | + $selector = 'body > '.$selector; |
| 21 | + } |
22 | 22 | } |
23 | 23 |
|
24 | | - return "/({$pattern})(<{$tag->tag})(?! class)/iU"; |
25 | | - } |
| 24 | + $crawler = new Crawler($value); |
| 25 | + $nodes = $crawler->filter($selector); |
26 | 26 |
|
27 | | - private function defineReplacement(Tag $tag): string |
28 | | - { |
29 | | - return "$1<{$tag->tag} class=\"{$tag->classes}\""; |
| 27 | + if (count($nodes) == 0) { |
| 28 | + return $value; |
| 29 | + } |
| 30 | + |
| 31 | + foreach ($nodes as $node) { |
| 32 | + $node->setAttribute('class', $tag->classes); |
| 33 | + } |
| 34 | + |
| 35 | + // Generate the HTML with our class adjustments made. |
| 36 | + $result = $crawler->html(); |
| 37 | + |
| 38 | + // Removes the <body> and </body> tags that get added since it's a fragment. |
| 39 | + $result = substr($result, 6); |
| 40 | + |
| 41 | + return substr($result, 0, -7); |
30 | 42 | } |
31 | 43 | } |
0 commit comments