diff --git a/lib/strip_html.ts b/lib/strip_html.ts
index 492974ce..24a5abd6 100644
--- a/lib/strip_html.ts
+++ b/lib/strip_html.ts
@@ -1,6 +1,15 @@
-const STATE_PLAINTEXT = Symbol('plaintext');
-const STATE_HTML = Symbol('html');
-const STATE_COMMENT = Symbol('comment');
+const STATE_PLAINTEXT = 0;
+const STATE_HTML = 1;
+const STATE_COMMENT = 2;
+
+const CHAR_LT = 60; // '<'
+const CHAR_GT = 62; // '>'
+const CHAR_QUOTE = 34; // '"'
+const CHAR_APOS = 39; // "'"
+const CHAR_DASH = 45; // '-'
+const CHAR_SPACE = 32; // ' '
+const CHAR_NEWLINE = 10; // '\n'
+const CHAR_EXCLAIM = 33; // '!'
// eslint-disable-next-line @typescript-eslint/ban-types
function striptags(html: string | String = '') {
@@ -10,114 +19,84 @@ function striptags(html: string | String = '') {
}
let state = STATE_PLAINTEXT;
- let tag_buffer = '';
let depth = 0;
- let in_quote_char = '';
+ let in_quote_char = 0;
let output = '';
+ let tag_start = -1;
+ let plain_text_start = 0;
const { length } = html;
for (let idx = 0; idx < length; idx++) {
- const char = html[idx];
+ const charCode = html.charCodeAt(idx);
if (state === STATE_PLAINTEXT) {
- switch (char) {
- case '<':
- state = STATE_HTML;
- tag_buffer = tag_buffer + char;
- break;
-
- default:
- output += char;
- break;
+ if (charCode === CHAR_LT) {
+ output += html.slice(plain_text_start, idx);
+ state = STATE_HTML;
+ tag_start = idx;
}
} else if (state === STATE_HTML) {
- switch (char) {
- case '<':
- // ignore '<' if inside a quote
- if (in_quote_char) break;
-
- // we're seeing a nested '<'
- depth++;
- break;
-
- case '>':
- // ignore '>' if inside a quote
- if (in_quote_char) {
- break;
- }
-
- // something like this is happening: '<<>>'
+ if (charCode === CHAR_LT) {
+ // ignore '<' if inside a quote
+ if (!in_quote_char) depth++;
+ } else if (charCode === CHAR_GT) {
+ // ignore '>' if inside a quote
+ if (!in_quote_char) {
if (depth) {
depth--;
-
- break;
- }
-
- // this is closing the tag in tag_buffer
- in_quote_char = '';
- state = STATE_PLAINTEXT;
- // tag_buffer += '>';
-
- tag_buffer = '';
- break;
-
- case '"':
- case '\'':
- // catch both single and double quotes
-
- if (char === in_quote_char) {
- in_quote_char = '';
} else {
- in_quote_char = in_quote_char || char;
- }
-
- tag_buffer = tag_buffer + char;
- break;
-
- case '-':
- if (tag_buffer === '':
- if (tag_buffer.slice(-2) === '--') {
- // close the comment
- state = STATE_PLAINTEXT;
- }
-
- tag_buffer = '';
- break;
-
- default:
- tag_buffer = tag_buffer + char;
- break;
+ if (charCode === CHAR_GT) {
+ // same as if (html.slice(idx - 2, idx) === '--') {
+ if (idx >= 2
+ && html.charCodeAt(idx - 1) === CHAR_DASH
+ && html.charCodeAt(idx - 2) === CHAR_DASH) {
+ // close the comment
+ state = STATE_PLAINTEXT;
+ plain_text_start = idx + 1;
+ }
+ tag_start = -1;
}
}
}
+ if (state === STATE_PLAINTEXT && plain_text_start < length) {
+ output += html.slice(plain_text_start);
+ }
+
return output;
}