Skip to content

Commit a0f857c

Browse files
committed
Merge pull request #53 from mtibben/use_mb_funcs
Always use multibyte string functions
2 parents 1338bc9 + aceae14 commit a0f857c

File tree

2 files changed

+32
-18
lines changed

2 files changed

+32
-18
lines changed

composer.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,9 @@
88
},
99
"require-dev": {
1010
"phpunit/phpunit": "~4"
11+
},
12+
"suggest": {
13+
"ext-mbstring": "For best performance",
14+
"symfony/polyfill-mbstring": "If you can't install ext-mbstring"
1115
}
1216
}

src/Html2Text.php

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ class Html2Text
2323
{
2424
const ENCODING = 'UTF-8';
2525

26+
protected $htmlFuncFlags;
27+
2628
/**
2729
* Contains the HTML content to convert.
2830
*
@@ -236,6 +238,9 @@ public function __construct($html = '', $options = array())
236238

237239
$this->html = $html;
238240
$this->options = array_merge($this->options, $options);
241+
$this->htmlFuncFlags = (PHP_VERSION_ID < 50400)
242+
? ENT_COMPAT
243+
: ENT_COMPAT | ENT_HTML5;
239244
}
240245

241246
/**
@@ -318,6 +323,16 @@ public function set_base_url($baseurl)
318323
}
319324

320325
protected function convert()
326+
{
327+
$origEncoding = mb_internal_encoding();
328+
mb_internal_encoding(self::ENCODING);
329+
330+
$this->doConvert();
331+
332+
mb_internal_encoding($origEncoding);
333+
}
334+
335+
protected function doConvert()
321336
{
322337
$this->linkList = array();
323338

@@ -345,7 +360,7 @@ protected function converter(&$text)
345360
$text = preg_replace_callback($this->callbackSearch, array($this, 'pregCallback'), $text);
346361
$text = strip_tags($text);
347362
$text = preg_replace($this->entSearch, $this->entReplace, $text);
348-
$text = html_entity_decode($text, ENT_QUOTES, self::ENCODING);
363+
$text = html_entity_decode($text, $this->htmlFuncFlags, self::ENCODING);
349364

350365
// Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
351366
$text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
@@ -395,7 +410,7 @@ protected function buildlinkList($link, $display, $linkOverride = null)
395410
$url = $link;
396411
} else {
397412
$url = $this->baseurl;
398-
if (substr($link, 0, 1) != '/') {
413+
if (mb_substr($link, 0, 1) != '/') {
399414
$url .= '/';
400415
}
401416
$url .= $link;
@@ -472,7 +487,7 @@ protected function convertBlockquotes(&$text)
472487
$end = $m[1];
473488
$len = $end - $taglen - $start;
474489
// Get blockquote content
475-
$body = substr($text, $start + $taglen - $diff, $len);
490+
$body = mb_substr($text, $start + $taglen - $diff, $len);
476491

477492
// Set text width
478493
$pWidth = $this->options['width'];
@@ -482,20 +497,21 @@ protected function convertBlockquotes(&$text)
482497
$this->converter($body);
483498
// Add citation markers and create PRE block
484499
$body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body));
485-
$body = '<pre>' . htmlspecialchars($body) . '</pre>';
500+
$body = '<pre>' . htmlspecialchars($body, $this->htmlFuncFlags, self::ENCODING) . '</pre>';
486501
// Re-set text width
487502
$this->options['width'] = $pWidth;
488503
// Replace content
489-
$text = substr($text, 0, $start - $diff)
490-
. $body . substr($text, $end + strlen($m[0]) - $diff);
504+
$text = mb_substr($text, 0, $start - $diff)
505+
. $body
506+
. mb_substr($text, $end + mb_strlen($m[0]) - $diff);
491507

492-
$diff += $len + $taglen + strlen($m[0]) - strlen($body);
508+
$diff += $len + $taglen + mb_strlen($m[0]) - mb_strlen($body);
493509
unset($body);
494510
}
495511
} else {
496512
if ($level == 0) {
497513
$start = $m[1];
498-
$taglen = strlen($m[0]);
514+
$taglen = mb_strlen($m[0]);
499515
}
500516
$level++;
501517
}
@@ -511,7 +527,7 @@ protected function convertBlockquotes(&$text)
511527
*/
512528
protected function pregCallback($matches)
513529
{
514-
switch (strtolower($matches[1])) {
530+
switch (mb_strtolower($matches[1])) {
515531
case 'p':
516532
// Replace newlines with spaces.
517533
$para = str_replace("\n", " ", $matches[3]);
@@ -585,15 +601,9 @@ protected function toupper($str)
585601
*/
586602
protected function strtoupper($str)
587603
{
588-
$str = html_entity_decode($str, ENT_COMPAT, self::ENCODING);
589-
590-
if (function_exists('mb_strtoupper')) {
591-
$str = mb_strtoupper($str, self::ENCODING);
592-
} else {
593-
$str = strtoupper($str);
594-
}
595-
596-
$str = htmlspecialchars($str, ENT_COMPAT, self::ENCODING);
604+
$str = html_entity_decode($str, $this->htmlFuncFlags, self::ENCODING);
605+
$str = mb_strtoupper($str);
606+
$str = htmlspecialchars($str, $this->htmlFuncFlags, self::ENCODING);
597607

598608
return $str;
599609
}

0 commit comments

Comments
 (0)