Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 69 additions & 90 deletions lib/strip_html.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
const STATE_PLAINTEXT = Symbol('plaintext');
const STATE_HTML = Symbol('html');
const STATE_COMMENT = Symbol('comment');
const STATE_PLAINTEXT = 0;
const STATE_HTML = 1;
const STATE_COMMENT = 2;

const CHAR_LT = 60; // '<'
const CHAR_GT = 62; // '>'
const CHAR_QUOTE = 34; // '"'
const CHAR_APOS = 39; // "'"
const CHAR_DASH = 45; // '-'
const CHAR_SPACE = 32; // ' '
const CHAR_NEWLINE = 10; // '\n'
const CHAR_EXCLAIM = 33; // '!'

// eslint-disable-next-line @typescript-eslint/ban-types
function striptags(html: string | String = '') {
Expand All @@ -10,114 +19,84 @@ function striptags(html: string | String = '') {
}

let state = STATE_PLAINTEXT;
let tag_buffer = '';
let depth = 0;
let in_quote_char = '';
let in_quote_char = 0;
let output = '';
let tag_start = -1;
let plain_text_start = 0;

const { length } = html;

for (let idx = 0; idx < length; idx++) {
const char = html[idx];
const charCode = html.charCodeAt(idx);

if (state === STATE_PLAINTEXT) {
switch (char) {
case '<':
state = STATE_HTML;
tag_buffer = tag_buffer + char;
break;

default:
output += char;
break;
if (charCode === CHAR_LT) {
output += html.slice(plain_text_start, idx);
state = STATE_HTML;
tag_start = idx;
}
} else if (state === STATE_HTML) {
switch (char) {
case '<':
// ignore '<' if inside a quote
if (in_quote_char) break;

// we're seeing a nested '<'
depth++;
break;

case '>':
// ignore '>' if inside a quote
if (in_quote_char) {
break;
}

// something like this is happening: '<<>>'
if (charCode === CHAR_LT) {
// ignore '<' if inside a quote
if (!in_quote_char) depth++;
} else if (charCode === CHAR_GT) {
// ignore '>' if inside a quote
if (!in_quote_char) {
if (depth) {
depth--;

break;
}

// this is closing the tag in tag_buffer
in_quote_char = '';
state = STATE_PLAINTEXT;
// tag_buffer += '>';

tag_buffer = '';
break;

case '"':
case '\'':
// catch both single and double quotes

if (char === in_quote_char) {
in_quote_char = '';
} else {
in_quote_char = in_quote_char || char;
}

tag_buffer = tag_buffer + char;
break;

case '-':
if (tag_buffer === '<!-') {
state = STATE_COMMENT;
}

tag_buffer = tag_buffer + char;
break;

case ' ':
case '\n':
if (tag_buffer === '<') {
// this is closing the tag in tag_buffer
in_quote_char = 0;
state = STATE_PLAINTEXT;
output += '< ';
tag_buffer = '';

break;
tag_start = -1;
plain_text_start = idx + 1;
}

tag_buffer = tag_buffer + char;
break;

default:
tag_buffer = tag_buffer + char;
break;
}
} else if (charCode === CHAR_QUOTE || charCode === CHAR_APOS) {
// catch both single and double quotes

if (charCode === in_quote_char) {
in_quote_char = 0;
} else {
in_quote_char = in_quote_char || charCode;
}
} else if (charCode === CHAR_DASH) {
// same as if (html.slice(tag_start, idx) === '<!-') {
if (idx - tag_start === 3
&& html.charCodeAt(tag_start + 1) === CHAR_EXCLAIM
&& html.charCodeAt(tag_start + 2) === CHAR_DASH
) {
state = STATE_COMMENT;
}
} else if (charCode === CHAR_SPACE || charCode === CHAR_NEWLINE) {
// same as if (html.slice(tag_start, idx) === '<') {
if (idx - tag_start === 1) {
state = STATE_PLAINTEXT;
output += '< ';
tag_start = -1;
plain_text_start = idx + 1;
}
}
} else if (state === STATE_COMMENT) {
switch (char) {
case '>':
if (tag_buffer.slice(-2) === '--') {
// close the comment
state = STATE_PLAINTEXT;
}

tag_buffer = '';
break;

default:
tag_buffer = tag_buffer + char;
break;
if (charCode === CHAR_GT) {
// same as if (html.slice(idx - 2, idx) === '--') {
if (idx >= 2
&& html.charCodeAt(idx - 1) === CHAR_DASH
&& html.charCodeAt(idx - 2) === CHAR_DASH) {
// close the comment
state = STATE_PLAINTEXT;
plain_text_start = idx + 1;
}
tag_start = -1;
}
}
}

if (state === STATE_PLAINTEXT && plain_text_start < length) {
output += html.slice(plain_text_start);
}

return output;
}

Expand Down
Loading