Skip to content

Commit abda8bd

Browse files
committed
fix: removes comments from code containing RegExp literals, #180
1 parent fa459b1 commit abda8bd

File tree

4 files changed

+342
-47
lines changed

4 files changed

+342
-47
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 4.20.10 (2025-07-02)
4+
5+
- fix: removes comments from code containing RegExp literals, #180
6+
37
## 4.20.9 (2025-07-01)
48

59
- fix: prevent build failure when used JS-template with dynamic tag attribute, #178

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "html-bundler-webpack-plugin",
3-
"version": "4.20.9",
3+
"version": "4.20.10",
44
"description": "Generates complete single-page or multi-page website from source assets. Built-in support for Markdown, Eta, EJS, Handlebars, Nunjucks, Pug. Alternative to html-webpack-plugin.",
55
"keywords": [
66
"html",

src/Loader/Utils.js

Lines changed: 173 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -215,13 +215,13 @@ const stringifyJSON = (data) => {
215215
* const output = stripComments(input);
216216
* // => "const x = 'text // not a comment'; "
217217
*/
218-
function stripComments(code) {
218+
function stripComments2(code) {
219219
const len = code.length;
220-
let inStr = null; // "'", '"', or '`'
221-
let inRegex = false;
222-
let regexClass = false; // inside [...]
223-
let inBlockComment = false;
224-
let inLineComment = false;
220+
let inStr = null; // quote: ', ", or `
221+
let inRegex = false; // inside /.../ regex literal
222+
let inRegexClass = false; // inside [...] character class
223+
let inLineComment = false; // inside //
224+
let inBlockComment = false; // inside /* */
225225
let out = '';
226226
let i = 0;
227227

@@ -230,15 +230,15 @@ function stripComments(code) {
230230
const next = code[i + 1];
231231
const prev = code[i - 1];
232232

233-
// end of line comment
233+
// line comment end
234234
if (inLineComment && (char === '\n' || char === '\r')) {
235235
inLineComment = false;
236236
out += char;
237237
i++;
238238
continue;
239239
}
240240

241-
// end of block comment
241+
// block comment end
242242
if (inBlockComment && char === '*' && next === '/') {
243243
inBlockComment = false;
244244
i += 2;
@@ -250,29 +250,66 @@ function stripComments(code) {
250250
continue;
251251
}
252252

253-
// inside string
253+
// string start
254+
if (!inStr && (char === '"' || char === "'" || char === '`')) {
255+
inStr = char;
256+
out += char;
257+
i++;
258+
continue;
259+
}
260+
261+
// string end
254262
if (inStr) {
255263
out += char;
256264
if (char === '\\') {
257-
out += code[i + 1];
258-
i += 2;
259-
continue;
265+
// copy escaped char
266+
if (i + 1 < len) {
267+
out += code[i + 1];
268+
i += 2;
269+
continue;
270+
}
271+
} else if (char === inStr) {
272+
inStr = null;
260273
}
261-
if (char === inStr) inStr = null;
262274
i++;
263275
continue;
264276
}
265277

266-
// inside RegExp
278+
// RegExp start
279+
if (char === '/' && next !== '/' && next !== '*') {
280+
// scan backward for prev non-whitespace char
281+
let j = i - 1;
282+
while (j >= 0 && /\s/.test(code[j])) j--;
283+
const prevChar = j >= 0 ? code[j] : '';
284+
// treat as Regex if after following chars
285+
if (j < 0 || /[=({\[:~%^\-*,!&|?;<>]/.test(prevChar)) {
286+
inRegex = true;
287+
inRegexClass = false;
288+
out += char;
289+
i++;
290+
continue;
291+
}
292+
// otherwise treat as division
293+
}
294+
295+
// RegExp end
267296
if (inRegex) {
268297
out += char;
269-
if (regexClass) {
270-
if (char === ']' && prev !== '\\') regexClass = false;
298+
if (inRegexClass) {
299+
if (char === ']' && prev !== '\\') inRegexClass = false;
271300
} else {
272301
if (char === '[' && prev !== '\\') {
273-
regexClass = true;
302+
inRegexClass = true;
274303
} else if (char === '/' && prev !== '\\') {
275-
inRegex = false; // flags follow
304+
inRegex = false;
305+
// copy regex flags
306+
let k = i + 1;
307+
while (k < len && /[a-z]/i.test(code[k])) {
308+
out += code[k];
309+
k++;
310+
}
311+
i = k;
312+
continue;
276313
}
277314
}
278315
i++;
@@ -293,32 +330,142 @@ function stripComments(code) {
293330
continue;
294331
}
295332

296-
// string start
297-
if (char === '"' || char === "'" || char === '`') {
333+
out += char;
334+
i++;
335+
}
336+
337+
return out;
338+
}
339+
340+
/**
341+
* Removes all JavaScript comments (single-line and multi-line) from code,
342+
* while preserving strings and regex literals with comment-like patterns.
343+
*
344+
* @param {string} code The source code.
345+
* @returns {string} The code with all comments removed.
346+
*/
347+
function stripComments(code) {
348+
let out = '';
349+
let inStr = null; // current quote: ', ", or `
350+
let inRegex = false; // inside a /.../ regex literal
351+
let inRegexClass = false; // inside a [...] character class within regex
352+
let inLineComment = false; // inside //
353+
let inBlockComment = false; // inside /* */
354+
let prev = '';
355+
let i = 0;
356+
const len = code.length;
357+
358+
while (i < len) {
359+
const char = code[i];
360+
const next = code[i + 1];
361+
362+
// -- Line Comment --
363+
if (inLineComment) {
364+
if (char === '\n' || char === '\r') {
365+
inLineComment = false;
366+
out += char;
367+
}
368+
i++;
369+
continue;
370+
}
371+
372+
// -- Block Comment --
373+
if (inBlockComment) {
374+
if (char === '*' && next === '/') {
375+
inBlockComment = false;
376+
i += 2;
377+
} else {
378+
i++;
379+
}
380+
continue;
381+
}
382+
383+
// -- String Start --
384+
if (!inStr && !inRegex && (char === '"' || char === "'" || char === '`')) {
298385
inStr = char;
299386
out += char;
300387
i++;
301388
continue;
302389
}
303390

304-
// RegExp start (простая, но практичная эвристика)
305-
if (char === '/' && next !== '/' && next !== '*') {
391+
// -- Inside String --
392+
if (inStr) {
393+
out += char;
394+
if (char === '\\') {
395+
// Copy escaped char
396+
if (i + 1 < len) {
397+
out += code[i + 1];
398+
i += 2;
399+
} else {
400+
i++;
401+
}
402+
continue;
403+
} else if (char === inStr) {
404+
inStr = null;
405+
}
406+
i++;
407+
continue;
408+
}
409+
410+
// -- Regex Start (not after identifier, ), ], +, - or number) --
411+
if (!inRegex && char === '/' && next !== '/' && next !== '*') {
412+
// Scan backward for prev non-whitespace char
306413
let j = i - 1;
307414
while (j >= 0 && /\s/.test(code[j])) j--;
308-
const prevSym = j >= 0 ? code[j] : '';
309-
if (j < 0 || /[({\[=:+\-*,!&|?;<>]/.test(prevSym)) {
415+
const prevChar = j >= 0 ? code[j] : '';
416+
// Only treat as regex if after certain tokens or start of input
417+
if (j < 0 || /[=({\[:;,!&|?~%^<>*/]/.test(prevChar)) {
310418
inRegex = true;
311-
regexClass = false;
312419
out += char;
313420
i++;
314421
continue;
315422
}
423+
// otherwise: treat as division
424+
}
425+
426+
// -- Inside Regex --
427+
if (inRegex) {
428+
out += char;
429+
if (inRegexClass) {
430+
if (char === ']' && prev !== '\\') inRegexClass = false;
431+
} else {
432+
if (char === '[' && prev !== '\\') {
433+
inRegexClass = true;
434+
} else if (char === '/' && prev !== '\\') {
435+
inRegex = false;
436+
// Copy regex flags
437+
let k = i + 1;
438+
while (k < len && /[a-z]/i.test(code[k])) {
439+
out += code[k];
440+
k++;
441+
}
442+
i = k;
443+
continue;
444+
}
445+
}
446+
prev = char;
447+
i++;
448+
continue;
316449
}
317450

451+
// -- Line Comment Start --
452+
if (char === '/' && next === '/') {
453+
inLineComment = true;
454+
i += 2;
455+
continue;
456+
}
457+
458+
// -- Block Comment Start --
459+
if (char === '/' && next === '*') {
460+
inBlockComment = true;
461+
i += 2;
462+
continue;
463+
}
464+
465+
// -- Default: copy --
318466
out += char;
319467
i++;
320468
}
321-
322469
return out;
323470
}
324471

0 commit comments

Comments
 (0)