Skip to content

Commit 8086c25

Browse files
committed
Removed Union as standard character class is already an union.
1 parent 8099423 commit 8086c25

File tree

8 files changed

+518
-602
lines changed

8 files changed

+518
-602
lines changed

javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java

Lines changed: 0 additions & 22 deletions
This file was deleted.

javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,4 @@ public interface Visitor {
6767
public void visit(CharacterClassIntersection nd);
6868

6969
public void visit(CharacterClassSubtraction nd);
70-
71-
public void visit(CharacterClassUnion nd);
7270
}

javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
1414
import com.semmle.js.ast.regexp.CharacterClassRange;
1515
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
16-
import com.semmle.js.ast.regexp.CharacterClassUnion;
1716
import com.semmle.js.ast.regexp.Constant;
1817
import com.semmle.js.ast.regexp.ControlEscape;
1918
import com.semmle.js.ast.regexp.ControlLetter;
@@ -99,7 +98,6 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) {
9998
termkinds.put("CharacterClassQuotedString", 28);
10099
termkinds.put("CharacterClassIntersection", 29);
101100
termkinds.put("CharacterClassSubtraction", 30);
102-
termkinds.put("CharacterClassUnion", 31);
103101
}
104102

105103
private static final String[] errmsgs =
@@ -374,14 +372,6 @@ public void visit(CharacterClassSubtraction nd) {
374372
for (RegExpTerm element : nd.getElements())
375373
visit(element, lbl, i++);
376374
}
377-
378-
@Override
379-
public void visit(CharacterClassUnion nd) {
380-
Label lbl = extractTerm(nd, parent, idx);
381-
int i = 0;
382-
for (RegExpTerm element : nd.getElements())
383-
visit(element, lbl, i++);
384-
}
385375
}
386376

387377
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
1010
import com.semmle.js.ast.regexp.CharacterClassRange;
1111
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
12-
import com.semmle.js.ast.regexp.CharacterClassUnion;
1312
import com.semmle.js.ast.regexp.Constant;
1413
import com.semmle.js.ast.regexp.ControlEscape;
1514
import com.semmle.js.ast.regexp.ControlLetter;
@@ -568,8 +567,7 @@ private RegExpTerm parseCharacterClass() {
568567
private enum CharacterClassType {
569568
STANDARD,
570569
INTERSECTION,
571-
SUBTRACTION,
572-
UNION
570+
SUBTRACTION
573571
}
574572

575573
// ECMA 2024 `v` flag allows nested character classes.
@@ -601,26 +599,12 @@ else if (lookahead("--")) {
601599
}
602600
}
603601

604-
boolean containsComplex = elements.stream().anyMatch(term -> term instanceof UnicodePropertyEscape ||
605-
term instanceof CharacterClassQuotedString ||
606-
term instanceof CharacterClass);
607-
608-
// Set type to UNION only if:
609-
// 1. We haven't already determined a specific type (intersection/subtraction)
610-
// 2. We have more than one element
611-
// 3. We have at least one complex element (i.e. a nested character class or a UnicodePropertyEscape)
612-
if (containsComplex && classType == CharacterClassType.STANDARD && elements.size() > 1) {
613-
classType = CharacterClassType.UNION;
614-
}
615-
616602
// Create appropriate RegExpTerm based on the detected class type
617603
switch (classType) {
618604
case INTERSECTION:
619605
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted));
620606
case SUBTRACTION:
621607
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassSubtraction(loc, elements)), inverted));
622-
case UNION:
623-
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassUnion(loc, elements)), inverted));
624608
case STANDARD:
625609
default:
626610
return this.finishTerm(new CharacterClass(loc, elements, inverted));

javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap

Lines changed: 51 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -137,81 +137,75 @@ regexpterm(#20042,23,#20041,0,"[ [] [ [] [] ] ]")
137137
locations_default(#20043,#10000,3,2,3,17)
138138
hasLocation(#20042,#20043)
139139
#20044=*
140-
regexpterm(#20044,31,#20042,0,"[ [] [ [] [] ] ]")
141-
hasLocation(#20044,#20043)
142-
#20045=*
143-
regexpterm(#20045,14,#20044,0," ")
144-
#20046=@"loc,{#10000},3,3,3,3"
145-
locations_default(#20046,#10000,3,3,3,3)
146-
hasLocation(#20045,#20046)
147-
regexp_const_value(#20045," ")
148-
#20047=*
149-
regexpterm(#20047,23,#20044,1,"[]")
150-
#20048=@"loc,{#10000},3,4,3,5"
151-
locations_default(#20048,#10000,3,4,3,5)
152-
hasLocation(#20047,#20048)
153-
#20049=*
154-
regexpterm(#20049,14,#20044,2," ")
155-
#20050=@"loc,{#10000},3,6,3,6"
156-
locations_default(#20050,#10000,3,6,3,6)
157-
hasLocation(#20049,#20050)
158-
regexp_const_value(#20049," ")
159-
#20051=*
160-
regexpterm(#20051,23,#20044,3,"[ [] [] ]")
161-
#20052=@"loc,{#10000},3,7,3,15"
162-
locations_default(#20052,#10000,3,7,3,15)
163-
hasLocation(#20051,#20052)
164-
#20053=*
165-
regexpterm(#20053,31,#20051,0,"[ [] [] ]")
166-
hasLocation(#20053,#20052)
140+
regexpterm(#20044,14,#20042,0," ")
141+
#20045=@"loc,{#10000},3,3,3,3"
142+
locations_default(#20045,#10000,3,3,3,3)
143+
hasLocation(#20044,#20045)
144+
regexp_const_value(#20044," ")
145+
#20046=*
146+
regexpterm(#20046,23,#20042,1,"[]")
147+
#20047=@"loc,{#10000},3,4,3,5"
148+
locations_default(#20047,#10000,3,4,3,5)
149+
hasLocation(#20046,#20047)
150+
#20048=*
151+
regexpterm(#20048,14,#20042,2," ")
152+
#20049=@"loc,{#10000},3,6,3,6"
153+
locations_default(#20049,#10000,3,6,3,6)
154+
hasLocation(#20048,#20049)
155+
regexp_const_value(#20048," ")
156+
#20050=*
157+
regexpterm(#20050,23,#20042,3,"[ [] [] ]")
158+
#20051=@"loc,{#10000},3,7,3,15"
159+
locations_default(#20051,#10000,3,7,3,15)
160+
hasLocation(#20050,#20051)
161+
#20052=*
162+
regexpterm(#20052,14,#20050,0," ")
163+
#20053=@"loc,{#10000},3,8,3,8"
164+
locations_default(#20053,#10000,3,8,3,8)
165+
hasLocation(#20052,#20053)
166+
regexp_const_value(#20052," ")
167167
#20054=*
168-
regexpterm(#20054,14,#20053,0," ")
169-
#20055=@"loc,{#10000},3,8,3,8"
170-
locations_default(#20055,#10000,3,8,3,8)
168+
regexpterm(#20054,23,#20050,1,"[]")
169+
#20055=@"loc,{#10000},3,9,3,10"
170+
locations_default(#20055,#10000,3,9,3,10)
171171
hasLocation(#20054,#20055)
172-
regexp_const_value(#20054," ")
173172
#20056=*
174-
regexpterm(#20056,23,#20053,1,"[]")
175-
#20057=@"loc,{#10000},3,9,3,10"
176-
locations_default(#20057,#10000,3,9,3,10)
173+
regexpterm(#20056,14,#20050,2," ")
174+
#20057=@"loc,{#10000},3,11,3,11"
175+
locations_default(#20057,#10000,3,11,3,11)
177176
hasLocation(#20056,#20057)
177+
regexp_const_value(#20056," ")
178178
#20058=*
179-
regexpterm(#20058,14,#20053,2," ")
180-
#20059=@"loc,{#10000},3,11,3,11"
181-
locations_default(#20059,#10000,3,11,3,11)
179+
regexpterm(#20058,23,#20050,3,"[]")
180+
#20059=@"loc,{#10000},3,12,3,13"
181+
locations_default(#20059,#10000,3,12,3,13)
182182
hasLocation(#20058,#20059)
183-
regexp_const_value(#20058," ")
184183
#20060=*
185-
regexpterm(#20060,23,#20053,3,"[]")
186-
#20061=@"loc,{#10000},3,12,3,13"
187-
locations_default(#20061,#10000,3,12,3,13)
184+
regexpterm(#20060,14,#20050,4," ")
185+
#20061=@"loc,{#10000},3,14,3,14"
186+
locations_default(#20061,#10000,3,14,3,14)
188187
hasLocation(#20060,#20061)
188+
regexp_const_value(#20060," ")
189189
#20062=*
190-
regexpterm(#20062,14,#20053,4," ")
191-
#20063=@"loc,{#10000},3,14,3,14"
192-
locations_default(#20063,#10000,3,14,3,14)
190+
regexpterm(#20062,14,#20042,4," ")
191+
#20063=@"loc,{#10000},3,16,3,16"
192+
locations_default(#20063,#10000,3,16,3,16)
193193
hasLocation(#20062,#20063)
194194
regexp_const_value(#20062," ")
195195
#20064=*
196-
regexpterm(#20064,14,#20044,4," ")
197-
#20065=@"loc,{#10000},3,16,3,16"
198-
locations_default(#20065,#10000,3,16,3,16)
196+
entry_cfg_node(#20064,#20001)
197+
#20065=@"loc,{#10000},1,1,1,0"
198+
locations_default(#20065,#10000,1,1,1,0)
199199
hasLocation(#20064,#20065)
200-
regexp_const_value(#20064," ")
201200
#20066=*
202-
entry_cfg_node(#20066,#20001)
203-
#20067=@"loc,{#10000},1,1,1,0"
204-
locations_default(#20067,#10000,1,1,1,0)
205-
hasLocation(#20066,#20067)
206-
#20068=*
207-
exit_cfg_node(#20068,#20001)
208-
hasLocation(#20068,#20023)
201+
exit_cfg_node(#20066,#20001)
202+
hasLocation(#20066,#20023)
209203
successor(#20040,#20041)
210-
successor(#20041,#20068)
204+
successor(#20041,#20066)
211205
successor(#20032,#20033)
212206
successor(#20033,#20040)
213207
successor(#20025,#20027)
214208
successor(#20027,#20032)
215-
successor(#20066,#20025)
209+
successor(#20064,#20025)
216210
numlines(#10000,3,3,1)
217211
filetype(#10000,"javascript")

0 commit comments

Comments
 (0)