Skip to content

Commit 1110d1d

Browse files
committed
fix: regexp charset parsing
1 parent ed747ff commit 1110d1d

File tree

4 files changed

+78
-10
lines changed

4 files changed

+78
-10
lines changed

.changeset/funny-pens-peel.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"htmljs-parser": patch
3+
---
4+
5+
Fix regexp charset parsing issue.
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
1╭─ <div pattern=/[abc/] /></div>
2+
│ ││ │ ││ ││ │ ╰─ closeTagEnd(div)
3+
│ ││ │ ││ ││ ╰─ closeTagName "div"
4+
│ ││ │ ││ │╰─ closeTagStart "</"
5+
│ ││ │ ││ ╰─ openTagEnd
6+
│ ││ │ │╰─ attrValue.value "/[abc/] /"
7+
│ ││ │ ╰─ attrValue "=/[abc/] /"
8+
│ ││ ╰─ attrName "pattern"
9+
│ │╰─ tagName "div"
10+
╰─ ╰─ openTagStart
11+
2╭─ <div pattern=/[a-z/] /></div>
12+
│ ││ │ ││ ││ │ ╰─ closeTagEnd(div)
13+
│ ││ │ ││ ││ ╰─ closeTagName "div"
14+
│ ││ │ ││ │╰─ closeTagStart "</"
15+
│ ││ │ ││ ╰─ openTagEnd
16+
│ ││ │ │╰─ attrValue.value "/[a-z/] /"
17+
│ ││ │ ╰─ attrValue "=/[a-z/] /"
18+
│ ││ ╰─ attrName "pattern"
19+
│ │╰─ tagName "div"
20+
╰─ ╰─ openTagStart
21+
3╭─ <div pattern=/[^abc/] /></div>
22+
│ ││ │ ││ ││ │ ╰─ closeTagEnd(div)
23+
│ ││ │ ││ ││ ╰─ closeTagName "div"
24+
│ ││ │ ││ │╰─ closeTagStart "</"
25+
│ ││ │ ││ ╰─ openTagEnd
26+
│ ││ │ │╰─ attrValue.value "/[^abc/] /"
27+
│ ││ │ ╰─ attrValue "=/[^abc/] /"
28+
│ ││ ╰─ attrName "pattern"
29+
│ │╰─ tagName "div"
30+
╰─ ╰─ openTagStart
31+
4╭─ <div pattern=/[a\]b/] /></div>
32+
│ ││ │ ││ ││ │ ╰─ closeTagEnd(div)
33+
│ ││ │ ││ ││ ╰─ closeTagName "div"
34+
│ ││ │ ││ │╰─ closeTagStart "</"
35+
│ ││ │ ││ ╰─ openTagEnd
36+
│ ││ │ │╰─ attrValue.value "/[a\\]b/] /"
37+
│ ││ │ ╰─ attrValue "=/[a\\]b/] /"
38+
│ ││ ╰─ attrName "pattern"
39+
│ │╰─ tagName "div"
40+
╰─ ╰─ openTagStart
41+
5╭─ <div pattern=/[\w\s/] /></div>
42+
│ ││ │ ││ ││ │ ╰─ closeTagEnd(div)
43+
│ ││ │ ││ ││ ╰─ closeTagName "div"
44+
│ ││ │ ││ │╰─ closeTagStart "</"
45+
│ ││ │ ││ ╰─ openTagEnd
46+
│ ││ │ │╰─ attrValue.value "/[\\w\\s/] /"
47+
│ ││ │ ╰─ attrValue "=/[\\w\\s/] /"
48+
│ ││ ╰─ attrName "pattern"
49+
│ │╰─ tagName "div"
50+
╰─ ╰─ openTagStart
51+
6╰─
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<div pattern=/[abc/] /></div>
2+
<div pattern=/[a-z/] /></div>
3+
<div pattern=/[^abc/] /></div>
4+
<div pattern=/[a\]b/] /></div>
5+
<div pattern=/[\w\s/] /></div>

src/states/REGULAR_EXPRESSION.ts

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,23 @@ export const REGULAR_EXPRESSION: StateDefinition<RegExpMeta> = {
1818
exit() {},
1919

2020
char(code, regExp) {
21-
if (code === CODE.BACK_SLASH) {
22-
// Handle escape sequence
23-
this.pos++; // skip \
24-
} else if (code === CODE.OPEN_SQUARE_BRACKET && regExp.isInCharSet) {
25-
regExp.isInCharSet = true;
26-
} else if (code === CODE.CLOSE_SQUARE_BRACKET && regExp.isInCharSet) {
27-
regExp.isInCharSet = false;
28-
} else if (code === CODE.FORWARD_SLASH && !regExp.isInCharSet) {
29-
this.pos++; // skip /
30-
this.exitState();
21+
switch (code) {
22+
case CODE.BACK_SLASH:
23+
// Handle escape sequence
24+
this.pos++; // skip \
25+
break;
26+
case CODE.OPEN_SQUARE_BRACKET:
27+
regExp.isInCharSet = true;
28+
break;
29+
case CODE.CLOSE_SQUARE_BRACKET:
30+
regExp.isInCharSet = false;
31+
break;
32+
case CODE.FORWARD_SLASH:
33+
if (!regExp.isInCharSet) {
34+
this.pos++; // skip /
35+
this.exitState();
36+
}
37+
break;
3138
}
3239
},
3340

0 commit comments

Comments
 (0)