Skip to content

Commit d484bce

Browse files
authored
Merge pull request #89 from on2/strikethrough
Handle <del> and <ins> tags
2 parents 0acb494 + 0fa5e5e commit d484bce

File tree

3 files changed

+57
-0
lines changed

3 files changed

+57
-0
lines changed

src/Html2Text.php

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class Html2Text
6161
'/<style\b[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
6262
'/<i\b[^>]*>(.*?)<\/i>/i', // <i>
6363
'/<em\b[^>]*>(.*?)<\/em>/i', // <em>
64+
'/<ins\b[^>]*>(.*?)<\/ins>/i', // <ins>
6465
'/(<ul\b[^>]*>|<\/ul>)/i', // <ul> and </ul>
6566
'/(<ol\b[^>]*>|<\/ol>)/i', // <ol> and </ol>
6667
'/(<dl\b[^>]*>|<\/dl>)/i', // <dl> and </dl>
@@ -91,6 +92,7 @@ class Html2Text
9192
'', // <style>s -- which strip_tags supposedly has problems with
9293
'_\\1_', // <i>
9394
'_\\1_', // <em>
95+
'_\\1_', // <ins>
9496
"\n\n", // <ul> and </ul>
9597
"\n\n", // <ol> and </ol>
9698
"\n\n", // <dl> and </dl>
@@ -148,6 +150,7 @@ class Html2Text
148150
'/<(br)[^>]*>[ ]*/i', // <br> with leading whitespace after the newline.
149151
'/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b>
150152
'/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong>
153+
'/<(del)( [^>]*)?>(.*?)<\/del>/i', // <del>
151154
'/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th>
152155
'/<(a) [^>]*href=("|\')([^"\']+)\2([^>]*)>(.*?)<\/a>/i' // <a href="">
153156
);
@@ -226,6 +229,8 @@ class Html2Text
226229
'width' => 70, // Maximum width of the formatted text, in columns.
227230
// Set this value to 0 (or less) to ignore word wrapping
228231
// and not constrain text to a fixed-width column.
232+
233+
'strikethough_del' => false, // use a combining character so that <del> appears struck-through.
229234
);
230235

231236
private function legacyConstruct($html = '', $fromFile = false, array $options = array())
@@ -575,6 +580,8 @@ protected function pregCallback($matches)
575580
case 'b':
576581
case 'strong':
577582
return $this->toupper($matches[3]);
583+
case 'del':
584+
return $this->tostrike($matches[3]);
578585
case 'th':
579586
return $this->toupper("\t\t" . $matches[3] . "\n");
580587
case 'h':
@@ -640,4 +647,24 @@ protected function strtoupper($str)
640647

641648
return $str;
642649
}
650+
651+
/**
652+
* Helper function for DEL conversion.
653+
*
654+
* @param string $text HTML content
655+
* @return string Converted text
656+
*/
657+
protected function tostrike($str)
658+
{
659+
if (self::ENCODING != 'UTF-8' || $this->options['strikethough_del'] !== true) {
660+
return $str;
661+
}
662+
$rtn = '';
663+
for ($i = 0; $i < mb_strlen($str); $i++) {
664+
$chr = mb_substr($str, $i, 1);
665+
$combiningChr = chr(0xC0 | 0x336 >> 6). chr(0x80 | 0x336 & 0x3F);
666+
$rtn .= $chr . $combiningChr;
667+
}
668+
return $rtn;
669+
}
643670
}

test/DelTest.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?php
2+
3+
namespace Html2Text;
4+
5+
class DelTest extends \PHPUnit_Framework_TestCase
6+
{
7+
public function testDel()
8+
{
9+
$html = 'My <del>Résumé</del> Curriculum Vitæ';
10+
$expected = 'My R̶é̶s̶u̶m̶é̶ Curriculum Vitæ';
11+
12+
$html2text = new Html2Text($html, array('strikethough_del' => true));
13+
$this->assertEquals($expected, $html2text->getText());
14+
}
15+
}

test/InsTest.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?php
2+
3+
namespace Html2Text;
4+
5+
class InsTest extends \PHPUnit_Framework_TestCase
6+
{
7+
public function testIns()
8+
{
9+
$html = 'This is <ins>inserted</ins>';
10+
$expected = 'This is _inserted_';
11+
12+
$html2text = new Html2Text($html);
13+
$this->assertEquals($expected, $html2text->getText());
14+
}
15+
}

0 commit comments

Comments
 (0)