Skip to content

Commit a0ed5d4

Browse files
committed
More quirky char class syntax scenarios.
1 parent 21511a6 commit a0ed5d4

File tree

3 files changed

+40
-22
lines changed

3 files changed

+40
-22
lines changed

core/src/main/java/com/github/simy4/coregex/core/CoregexParser.java

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ private Coregex literal(Context ctx) {
251251

252252
/*
253253
* <pre>{@code
254-
* set ::= '[', [ '^' ], { set-item }, ']'
254+
* set ::= '[', [ '^' ], set-item, { [ '&&' ], set-item }, ']'
255255
* }</pre>
256256
*/
257257
private Set set(Context ctx) {
@@ -261,9 +261,28 @@ private Set set(Context ctx) {
261261
ctx.match('^');
262262
negated = true;
263263
}
264-
Set.Builder set = Set.builder(ctx.flags);
264+
Set.Builder set = Set.builder(ctx.flags), intersect = null;
265+
setItem(set, ctx);
265266
while (']' != ctx.peek()) {
267+
boolean mustIntersect = false;
268+
if ('&' == ctx.peek() && '&' == ctx.peek(2)) {
269+
ctx.match('&');
270+
ctx.match('&');
271+
if (null != intersect) {
272+
set = intersect.intersect(set.build());
273+
}
274+
intersect = set;
275+
set = Set.builder(ctx.flags);
276+
mustIntersect = '[' == ctx.peek();
277+
}
266278
setItem(set, ctx);
279+
if (mustIntersect) {
280+
set = intersect.intersect(set.build());
281+
intersect = null;
282+
}
283+
}
284+
if (null != intersect) {
285+
set = intersect.intersect(set.build());
267286
}
268287
ctx.match(']');
269288
return (negated ? set.negate() : set).build();
@@ -283,16 +302,6 @@ private void setItem(Set.Builder set, Context ctx) {
283302
case '[':
284303
set.union(set(ctx));
285304
break;
286-
case '&':
287-
ctx.match('&');
288-
ch = ctx.peek();
289-
if ('&' == ch) {
290-
ctx.match('&');
291-
set.intersect(set(ctx));
292-
} else {
293-
set.single('&');
294-
}
295-
break;
296305
case '-':
297306
ctx.match('-');
298307
set.single('-');

core/src/test/scala/com/github/simy4/coregex/core/CoregexParserSuite.scala

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,14 +1027,21 @@ class CoregexParserSuite extends ScalaCheckSuite with CoregexArbitraries {
10271027
3,
10281028
6
10291029
) -> Pattern.compile("((?i)[a-z]+(?-i)-[A-Z]){3,6}"),
1030-
new Quantified(
1031-
set() {
1030+
new Concat(
1031+
new Quantified(
1032+
set() {
1033+
_.range('a', 'z')
1034+
.intersect(set()(_.union(set()(_.set('a', 'e', 'i', 'o', 'u').negate()).set())).set())
1035+
},
1036+
1
1037+
),
1038+
set()(_.single(']')),
1039+
set()(
10321040
_.range('a', 'z')
1033-
.range('A', 'Z')
1034-
.intersect(set()(_.set('a', 'e', 'i', 'o', 'u', 'A', 'E', 'I', 'O', 'U').negate()).set())
1035-
},
1036-
1
1037-
) -> Pattern.compile("[a-zA-Z&&[^aeiouAEIOU]]+"),
1041+
.intersect(set()(_.set('a', 'e', 'i', 'o', 'u')).set())
1042+
.intersect(set()(_.set('e', 'i')).set())
1043+
)
1044+
) -> Pattern.compile("[a-z&&[^aeiou]]+[]][a-z&&aeiou&&ei]"),
10381045
empty() -> Pattern.compile("^(?:||)$")
10391046
)
10401047
rng <- List(new RandomRNG())

core/src/test/scala/com/github/simy4/coregex/core/SetSuite.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,11 @@ class SetSuite extends ScalaCheckSuite with CoregexArbitraries {
5252
}
5353

5454
property("sampled should not be in intersection") {
55-
forAll { (set: Set, ch: Char) =>
56-
val intersection = Set.builder().union(set).intersect(Set.builder().single(ch).build()).build()
57-
!intersection.test(ch.toInt) :| s"$ch in [$intersection]"
55+
forAll { (left: Set, right: Set, seed1: Long, seed2: Long) =>
56+
val leftWithCommon = Set.builder().union(left).single(right.sample(seed1)).build()
57+
val intersection = Set.builder().union(leftWithCommon).intersect(right).build()
58+
val generated = intersection.sample(seed2)
59+
(leftWithCommon.test(generated.toInt) && right.test(generated.toInt)) :| s"$generated in [$intersection]"
5860
}
5961
}
6062

0 commit comments

Comments
 (0)